## Create embeddings for KJV passages from both Train & Test & Create Test Dataframe for Gemma where each KJV-BBE pair in Test has the most similar KJV-BBE pair from Train (based on KJV cosine similarity) to serve as example for in Context Learning 

In [1]:
pip install -U sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)
Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.5/171.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-2.7.0
Note: you may need to restart the kernel to use updated packages.


### Import packages

In [13]:
import numpy as np
import pandas as pd
import random
import torch
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [17]:
pd.set_option('display.max_colwidth', None) 
pd.set_option('display.max_rows', None) 

In [4]:
train_df = pd.read_csv('train_data.csv')

test_df = pd.read_csv('test_data.csv')

### Instantiate the Embedding Model

In [8]:
model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.3k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/653 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Compute embeddings for KJV for both train & test

In [9]:
# Function to compute embeddings for a column in the dataframe
def compute_embeddings(dataframe, column_name):
    sentences = dataframe[column_name].tolist()
    embeddings = model.encode(sentences)
    return embeddings

In [10]:
# Compute embeddings for the 'KJV' column in both dataframes
train_df['KJV_embedding'] = list(compute_embeddings(train_df, 'KJV'))
test_df['KJV_embedding'] = list(compute_embeddings(test_df, 'KJV'))

### For each row in test_df, add in the KJV-BBE pair that has the highest cosine similarity (based on KJV embeddings)

In [14]:
# Convert embeddings from lists to numpy arrays for cosine similarity computation
train_embeddings = np.vstack(train_df['KJV_embedding'].apply(np.array))
test_embeddings = np.vstack(test_df['KJV_embedding'].apply(np.array))

# Compute cosine similarities between all pairs of train and test embeddings
cos_similarities = cosine_similarity(test_embeddings, train_embeddings)

# Find the index of the most similar train embedding for each test row
max_sim_indices = np.argmax(cos_similarities, axis=1)
max_sim_scores = np.max(cos_similarities, axis=1)

# Extract the corresponding KJV, BBE, and embeddings from train_df
train_KJV_nearest = train_df.iloc[max_sim_indices]['KJV'].values
train_BBE_nearest = train_df.iloc[max_sim_indices]['BBE'].values
train_KJV_embedding_nearest = train_df.iloc[max_sim_indices]['KJV_embedding'].values

In [15]:
test_with_nearest_neigh_from_train_df = test_df.copy()
test_with_nearest_neigh_from_train_df['train_KJV'] = train_KJV_nearest
test_with_nearest_neigh_from_train_df['train_BBE'] = train_BBE_nearest
test_with_nearest_neigh_from_train_df['train_KJV_embedding'] = list(train_KJV_embedding_nearest)
test_with_nearest_neigh_from_train_df['cosine_similarity'] = max_sim_scores

In [21]:
test_with_nearest_neigh_from_train_df.sample(2)

Unnamed: 0,chapter,KJV,BBE,KJV_embedding,train_KJV,train_BBE,train_KJV_embedding,cosine_similarity
51,2Samuel12,"And the Lord sent Nathan unto David. And he came unto him, and said unto him, There were two men in one city; the one rich, and the other poor.","And the Lord sent Nathan to David. And Nathan came to him and said, There were two men in the same town: one a man of great wealth, and the other a poor man.","[0.0019456781, -0.0664589, 0.00020515449, -0.043806337, 0.042802762, 0.019549757, 0.007813624, 0.07654441, -0.02678678, -0.016572053, -0.00031892778, 0.084124774, -0.0014815987, 0.017194562, -0.0926432, 0.067365885, 0.010655339, 0.0074987654, -0.062914275, 0.007568671, 0.05054381, -0.049029566, 0.048862416, -0.011695857, -0.061570685, -0.037357718, -0.0073103267, 0.023696257, -0.03342989, -0.093847916, 0.03635113, -0.081188776, -0.0251645, -0.013169154, -0.005613785, -0.07132958, 0.008278797, -0.017125558, 0.08406128, 0.014549608, -0.034983102, 0.02933466, 0.057265177, 0.023257999, -0.003463733, -0.0018349476, 0.007426777, 0.07096132, 0.012567014, 0.03321697, -0.007759191, 0.009104291, -0.04882992, 0.038205262, 0.0014662221, 0.0017616815, 0.0051250993, 0.100668155, -0.011461984, -0.018603362, 0.011653269, 0.0046729273, -0.050066043, 0.079522416, 0.018111954, -0.015419813, -0.0059340945, -0.0007243474, -0.041703288, -0.021494377, 0.037996843, -0.07972471, 0.004527173, 0.0026382771, -0.025485286, 0.0637218, -0.007726328, -0.013545507, 0.017075919, 0.050411146, 0.013232987, 0.007072926, -0.015268463, -0.004849903, 0.0054924088, 0.055944495, -0.03764179, -0.041724313, 0.023159957, -0.025898667, 0.037764765, 0.05737158, -0.04361379, 0.03327848, 0.0132760955, 0.016934529, -0.020655682, 0.14357357, 0.009647435, -0.026397018, ...]","Then came all the tribes of Israel to David unto Hebron, and spake, saying, Behold, we are thy bone and thy flesh. Also in time past, when Saul was king over us, thou wast he that leddest out and broughtest in Israel: and the Lord said to thee, Thou shalt feed my people Israel, and thou shalt be a captain over Israel.","Then all the tribes of Israel came to David in Hebron and said, Truly, we are your bone and your flesh. In the past when Saul was king over us, it was you who went at the head of Israel when they went out or came in: and the Lord said to you, You are to be the keeper of my people Israel and their ruler.","[0.006571864, -0.05975793, 0.019358555, -0.023215389, 0.07000693, 0.016842127, -0.004783842, 0.06595206, 0.006171807, -0.022626774, -0.029991822, -0.0172439, -0.022506101, 0.0029152008, -0.030179556, 0.08140785, 0.013505134, -0.017631097, -0.02806181, -0.034348924, 0.010026969, -0.049590997, 0.021092758, -0.028717257, 0.034602154, -0.021887805, -0.07521154, 0.010695565, -0.04555428, -0.07236196, -0.006743338, -0.067069404, -0.032423977, -0.008195665, 0.022932949, 0.0011658097, -0.005362318, -0.019001441, 0.101935364, 0.015055844, -0.041378096, 5.0163006e-05, 0.003188094, -0.0174386, -0.041428808, 0.010243367, 0.022319952, 0.03525829, 0.023762845, -0.03899895, 0.0008595437, 0.062342744, -0.020758891, -0.014873612, 0.026265444, 0.021908177, 0.006529504, 0.06271395, 0.015592075, -0.0066196905, 0.042794, -0.031554986, -0.016327877, 0.0270553, -0.049949832, -0.002293736, 0.015283969, -0.064813375, -0.025129158, -0.01908601, 0.05872009, -0.039557472, 0.0063412273, -0.013100029, -0.0075966604, 0.04196706, 0.026816398, -0.031114917, -0.018941037, 0.039244346, 0.060640413, 0.015641917, -0.006994928, -0.019287568, 0.041515615, 0.05448225, -0.037175763, -0.056750115, -0.04376233, 0.014525626, 0.01933515, 0.03910303, -0.018820865, 0.028649297, -0.021085927, 0.04291153, -0.011898216, 0.08997161, -0.03335855, -0.03616893, ...]",0.582918
18,Jeremiah4,"If thou wilt return, O Israel, saith the Lord, return unto me: and if thou wilt put away thine abominations out of my sight, then shalt thou not remove. And thou shalt swear, The Lord liveth, in truth, in judgment, and in righteousness; and the nations shall bless themselves in him, and in him shall they glory.","If you will come back, O Israel, says the Lord, you will come back to me: and if you will put away your disgusting ways, you will not be sent away from before me. And you will take your oath, By the living Lord, in good faith and wisdom and righteousness; and the nations will make use of you as a blessing, and in you will they take a pride.","[0.033499505, 0.03224739, 0.02993704, -0.045747764, -0.008554368, 0.002032328, 0.0058730715, 0.037570875, 0.0012151466, -0.02546228, -0.048199825, -0.045070376, -0.04716944, -0.05186355, -0.0125430375, 0.030290684, 0.023809772, -0.066026464, -0.035167493, -0.011185327, -0.092480496, -0.014433477, -0.005653597, -0.0030494684, 0.0364757, -0.04718685, -0.004891968, -0.023348246, -0.028960105, -0.053056028, 0.0028030903, -0.04576637, 0.0036668265, 0.011638264, -0.006456983, 0.0073603312, -0.048755944, -0.022284197, 0.09806984, 0.0050672316, 0.027472824, 0.02154915, -0.05266859, 0.012553264, -0.017555133, 0.041322667, -0.034814958, 0.016690794, 0.021983134, -0.02430174, 0.012696091, 0.010409749, -0.045590974, -0.0139485, 0.025237858, 0.057657417, 0.0039071767, 0.08178025, -0.039982542, 0.043223567, 0.047346383, -0.006146665, -0.028989406, 0.014204054, 0.016110256, 0.007276098, -0.0013137981, -0.03787296, -0.01271676, -0.0458865, 0.056955047, 0.010500686, -0.004333557, 0.036262408, -0.046160217, 0.012524243, 0.051253434, -0.027495887, 0.003231306, 0.008396083, -0.0162972, 0.0050857225, -0.046835177, 0.023193153, 0.050631694, 0.023019008, -0.033247527, 4.164711e-05, -0.018846337, -0.0063838623, 0.0071894354, 0.02487141, -0.009953691, -0.031251483, -0.017019844, -0.0058962028, -0.0039999415, 0.00081470143, -0.034101352, -0.032892924, ...]","Now these are the commandments, the statutes, and the judgments, which the Lord your God commanded to teach you, that ye might do them in the land whither ye go to possess it: That thou mightest fear the Lord thy God, to keep all his statutes and his commandments, which I command thee, thou, and thy son, and thy son's son, all the days of thy life; and that thy days may be prolonged. Hear therefore, O Israel, and observe to do it; that it may be well with thee, and that ye may increase mightily, as the Lord God of thy fathers hath promised thee, in the land that floweth with milk and honey.","Now these are the orders and the laws and the decisions which the Lord your God gave me for your teaching, so that you might do them in the land of your heritage to which you are going: So that living in the fear of the Lord your God, you may keep all his laws and his orders, which I give you: you and your son and your son's son, all the days of your life; and so that your life may be long. So give ear, O Israel, and take care to do this; so that it may be well for you, and you may be greatly increased, as the Lord the God of your fathers has given you his word, in a land flowing with milk and honey.","[0.009956132, -0.02395142, 0.009917692, -0.017560972, 0.014799379, -0.005754645, -0.023466647, 0.066843316, 0.00541348, -0.013865623, 0.008927025, -0.021920674, 0.011982732, -0.030180002, -0.036914658, 0.04501858, 0.017299574, -0.07247242, -0.0406308, -0.038254853, -0.08145704, -0.033893272, 0.03338551, -0.009786466, 0.052477587, -0.030617932, -0.04817081, -0.014024674, -0.02332592, -0.025636327, 0.0024195232, -0.02279931, -0.04330985, 0.0071706884, 0.009276807, 0.0070602237, -0.011004561, -0.016881585, 0.07529448, -0.009292373, 0.025359742, 0.05401231, -0.069126464, 0.021822734, -0.00980713, 0.021419458, 0.030616349, 0.036681972, 0.006577283, 0.003221637, 0.015075095, 0.026021555, -0.045887623, 0.01078764, -0.0003731363, 0.0089153545, -0.015886214, 0.054993898, -0.029146612, 0.02712371, -0.022451993, 0.008744475, 0.0021559729, 0.08999621, 0.002022634, -0.016675666, -0.0014281424, -0.058921494, -0.026314357, 0.0040612947, 0.030835418, -0.04586493, 0.00017128515, -0.009182395, -0.022510702, -0.0013860214, 0.075583875, -0.04311526, -0.0057451352, -0.011896973, 0.070557594, -0.0032259014, -0.042183302, -0.004014094, 0.07918959, 0.0025306232, -0.016560534, -0.026028588, -0.026601711, -0.06824537, -0.028004482, 0.034662776, -0.036449403, 0.017237732, -0.0073941196, 0.043943524, -0.0027091957, 0.06310558, -0.016800493, -0.010345569, ...]",0.598588


In [22]:
test_with_nearest_neigh_from_train_df.to_csv('test_with_nearest_neigh_from_train_df.csv', index=False)