In [1]:
import os 
import time 
import json,math,sys,re
import pandas as pd 

In [2]:
folder = "../.."
from dotenv import load_dotenv
env_path = f'{folder}/deepseek.env'
load_dotenv(dotenv_path=env_path)
DEEPSEEK_KEY = os.getenv('DEEPSEEK_KEY')
from openai import OpenAI
client = OpenAI(api_key=DEEPSEEK_KEY, base_url="https://api.deepseek.com")

In [None]:
fname = "parallel_predictions_SAMPLE - FALSE"
parallel = pd.read_csv(f"{folder}/Early-Modern-Sermons/assets/QP_Datasets/{fname}.csv").fillna('')
parallel = parallel.to_dict(orient='records')
parallel[0], len(parallel)

In [3]:
fname = "parallel_verses_UNCONFIDENT - HNDPR"
parallel = pd.read_csv(f"{folder}/QP/{fname}.csv").fillna('')
parallel = parallel.to_dict(orient='records')
for item in parallel: 
    item['text'] = item['ESV'] + " " + item['text']
parallel[0], len(parallel)

({'ESV': 'Genesis 5.14',
  'text': 'Genesis 5.14 Thus all the days of Kenan were 910 years, and he died.',
  'Parallel': 'Genesis 5.27',
  'version': 'AKJV',
  'verse_text': 'Genesis 5.27 And all the dayes of Methuselah were nine hundred, sixtie and nine yeeres, and he died.',
  'rank': 0.0},
 3291)

In [26]:
prompt = """You are given pairs of Bible verses from different Bible versions. 
            Verse 1 is always from the English Standard Version. Verse 2 comes from a different version. 
            As an Early Modern English and Latin expert, you determine whether they have similar meanings or very similar language. 
            If Verse 2 is in Latin, compare its translation with Verse 1 to see if it refers to the same idea, even if the numbering might differ. 
            For example, "Psalms 113.2 facta est Judaea sanctificatio ejus; Israel potestas ejus." from the Vulgate is equivalent to "Psalms 114.2 Judah became his sanctuary, Israel his dominion." from the ESV.
            Return a True/False answer without giving any of your reasoning. 
        """

You are given pairs of Bible verses from different Bible versions. 
Verse 1 is always from the English Standard Version. Verse 2 comes from a different version. 
As an Early Modern English and Latin expert, you determine whether they have similar meanings or very similar language. 
If Verse 2 is in Latin, compare its translation with Verse 1 to see if it refers to the same idea, even if the numbering might differ. 
For example, "Psalms 113.2 facta est Judaea sanctificatio ejus; Israel potestas ejus." from the Vulgate is equivalent to "Psalms 114.2 Judah became his sanctuary, Israel his dominion." from the ESV.
Return a True/False answer without giving any of your reasoning. 

Input Verse 1 from the ESV: "Genesis 4.21 His brother's name was Jubal; he was the father of all those who play the lyre and pipe."
Input Verse 2 from the Wycliffe version: 'Genesis 4.21 and the name of his brother was Tubal, he was the fadir of syngeris in harpe and orgun.'

Output:


In [4]:
with open(f"{folder}/QP/predictions/{fname}_DSV3.json",'r') as f: 
    responses = json.load(f) 
model = "V3" 

In [None]:
from tqdm import tqdm 

for idx, item in enumerate(tqdm(parallel)):
    key = "{}; {} ({})".format(item['ESV'], item['Parallel'], item['version'])
    if key in responses: continue 

    response = client.chat.completions.create(
        model='deepseek-chat',
        max_tokens = 2, 
        messages=[ 
            {"role": "user",  
             "content": f"{prompt}\n\nInput Verse 1 from the ESV: {item['text']}\nInput Verse 2 from the {item['version']}: {item['verse_text']}\n\nOutput:"
            }
        ],
        stream=False
    )
    
    responses[key] = response.choices[0].message.content
    if (idx+1) % 500 == 0: 
        with open(f"{folder}/QP/predictions/{fname}_DS{model}.json",'w') as f: 
            json.dump(responses,f) 

with open(f"{folder}/QP/predictions/{fname}_DS{model}.json",'w') as f: 
    json.dump(responses,f) 
    # 100%|██████████| 4380/4380 [4:56:13<00:00,  4.06s/it] 
    # 100%|██████████| 3291/3291 [3:09:16<00:00,  3.45s/it]

100%|██████████| 3291/3291 [3:09:16<00:00,  3.45s/it]   


In [31]:
output = []
found = {}
for item in parallel:
  key = "{}; {} ({})".format(item['ESV'], item['Parallel'], item['version'])
  if key in responses: 
    key2 = (item['ESV'],item['version'])
    item['prediction'] = responses[key].capitalize()
    if item['prediction'] == 'True': 
      found[key2] = True 
    output.append(item)
  else: 
    item['prediction'] = None 
    output.append(item)

for item in parallel: 
  key2 = (item['ESV'],item['version'])
  if key2 in found: 
    item['toCheck'] = False
  else: 
    item['toCheck'] = True 
output = pd.DataFrame(output)
output = output.sort_values(by='ESV')
output['correction'] = ''
print(len(output[output['toCheck'] == True]))
output.to_csv(f"{folder}/Early-Modern-Sermons/assets/QP_Datasets/parallel_verses_UNCONFIDENT - LABELED.csv",index=False)
output

1111


Unnamed: 0,ESV,text,Parallel,version,verse_text,rank,prediction,toCheck,correction
63,1 Chronicles 25.22,"1 Chronicles 25.22 to the fifteenth, to Jeremo...",1 Chronicles 25.23,AKJV,"1 Chronicles 25.23 The sixteenth to Hananiah, ...",0.0,False,False,
65,1 Chronicles 25.22,"1 Chronicles 25.22 to the fifteenth, to Jeremo...",1 Chronicles 25.19,AKJV,"1 Chronicles 25.19 The twelfth to Hashabiah, h...",2.0,False,False,
66,1 Chronicles 25.22,"1 Chronicles 25.22 to the fifteenth, to Jeremo...",1 Chronicles 25.21,AKJV,1 Chronicles 25.21 The fourteenth to Mattithia...,3.0,False,False,
67,1 Chronicles 25.22,"1 Chronicles 25.22 to the fifteenth, to Jeremo...",1 Chronicles 25.26,AKJV,"1 Chronicles 25.26 The nineteenth to Mallothi,...",4.0,False,False,
68,1 Chronicles 25.22,"1 Chronicles 25.22 to the fifteenth, to Jeremo...",1 Chronicles 25.22,AKJV,"1 Chronicles 25.22 The fifteenth to Ierimoth, ...",,True,False,
...,...,...,...,...,...,...,...,...,...
2625,Revelation 20.9,Revelation 20.9 And they marched up over the b...,Revelation 20.9,Vulgate,Revelation 20.9 Et descendit ignis a Deo de cæ...,,False,False,
2623,Revelation 20.9,Revelation 20.9 And they marched up over the b...,Revelation 20.11,Vulgate,Revelation 20.11 Et vidi thronum magnum candid...,1.0,False,False,
1223,Revelation 21.26,Revelation 21.26 They will bring into it the g...,Revelation 21.24,Tyndale,Revelation 21.24 And the people which are save...,0.0,False,True,
1224,Revelation 21.26,Revelation 21.26 They will bring into it the g...,Revelation 21.26,Tyndale,Revelation 21.26 And there shall entre into it...,,False,True,


In [None]:
output = []
preds = responses
for idx, entry in enumerate(parallel):
  if idx in preds:
    entry['prediction'] = preds[idx].capitalize()
  output.append(entry)

items = pd.read_csv(f"{folder}/Early-Modern-Sermons/assets/QP_Datasets/parallel_predictions_SAMPLE - FALSE.csv").fillna('').to_dict(orient='records')
output.extend(items) 
output = pd.DataFrame(output)
output = output.sort_values(by="index")
output.to_csv(f"{folder}/Early-Modern-Sermons/assets/QP_Datasets/parallel_predictions_SAMPLE - FALSE.csv",index=False)
output

In [23]:
output.to_csv(f"{folder}/Early-Modern-Sermons/assets/QP_Datasets/parallel_predictions_SAMPLE - FALSE.csv",index=False)


In [22]:
os.remove("../assets/QP_Datasets/parallel_predictions_SAMPLE - NEW.csv")