# Analysis of the Gospels
We well analyze _parallelism_ in two of the Gospels.

In [None]:
import spacy
import numpy as np
import pandas as pd
import re
import openai
from matplotlib import pyplot as plt
import config

openai.api_key = config.OPENAI_API_KEY

nlp = spacy.load("en_core_web_md") 
plt.rcParams['figure.figsize'] = [8, 4]

## Split into sentences

In [None]:
def process_file(filename):
    with open(filename) as f:
        text = f.readlines()
    text  = "".join([x.replace('\n', ' ') for x in text])
    text = re.sub(' +', ' ', text)
    doc = nlp(text)
    return(list(doc.sents))

In [None]:
mat = process_file("mateo.txt")
mar = process_file("marcos.txt")

In [None]:
len(mat), len(mar)

## Sentences into tokens
We will use OpenAI to transform sentences into vectors.

In [None]:
def get_embedding(sentence):
    tmp = openai.Embedding.create(input = sentence, model="text-embedding-ada-002")
    return tmp['data'][0]['embedding']

In [None]:
emb_mat = [get_embedding(str(x)) for x in mat]
emb_mar = [get_embedding(str(x)) for x in mar]

In [None]:
emb_mat = np.array(emb_mat)
emb_mar = np.array(emb_mar)

In [None]:
similitudes = np.matmul(emb_mat, emb_mar.T)
similitudes.shape

In [None]:
plt.imshow(similitudes, interpolation='nearest', aspect = 'auto');

In [None]:
x_indices, y_indices = np.indices(similitudes.shape)
dat = pd.DataFrame({
    'x' : x_indices.flatten(),
    'y' : y_indices.flatten(),
    'd' : similitudes.flatten()
})

In [None]:
dat.sort_values('d').tail()

In [None]:
dat.d.hist()

In [None]:
dat['mat'] = dat.x.apply(lambda i: str(mat[i]))

In [None]:
dat['mar'] = dat.y.apply(lambda i: str(mar[i]))

In [None]:
dat.sort_values('d', ascending = False).head(50)