In [1]:
import difflib

In [2]:
doc_path = "../data/analysis_data/bibles-txt-ft-cleaned/"

docs = ["RSV-1946-1Timothy.txt", "RSV-1971-1Timothy.txt"]

In [3]:
texts = []
for doc in docs:
    with open(doc_path + doc) as f:
        content = f.read()
    texts.append(content)

In [4]:
len(texts[0])

13700

In [5]:
len(texts[1])

13600

In [6]:
#Same as two above but together in a string

#print("Book A({}) has {} characters, Book B({}) has {} characters".format)

In [7]:
texts = [text.split('\n') for text in texts]
print(len(texts))
texts

2


[['THE FIRST LETTER OF PAUL TO TIMOTHY',
  '',
  '',
  '1 Paul, an apostle of Christ Jesus by command of God our Savior and of Christ Jesus our hope,',
  '2 To Timothy, my true child in the faith: Grace, mercy, and peace from God the Father and Christ Jesus our Lord.',
  "3 As I urged you when I was going to Mac-e-do'ni-a, remain at Ephesus that you may charge certain persons not to teach any different doctrine, ",
  '4 nor to occupy them-selves with myths and endless genealogies which promote speculations rather than the divine training (1) that is in faith;',
  '5 whereas the aim of our charge is love that issues from a pure heart and a good conscience and sincere faith. ',
  '6 Cer-tain persons by swerving from these have wandered away into vain discussion, ',
  '7 desiring to be teachers of the law, without understanding either what they are saying or the things about which they make assertions.',
  '8 Now we know that the law is good, if anyone uses it lawfully, ',
  '9 understand

In [8]:
# Filter so only numbered verses remain

import re

verses = []

for text in texts:
    by_verse = []
    for line in text:
        if re.match('^[0-9]* ', line):
            by_verse.append(line)
        else:
            pass
    verses.append(by_verse)
        
verses

[['1 Paul, an apostle of Christ Jesus by command of God our Savior and of Christ Jesus our hope,',
  '2 To Timothy, my true child in the faith: Grace, mercy, and peace from God the Father and Christ Jesus our Lord.',
  "3 As I urged you when I was going to Mac-e-do'ni-a, remain at Ephesus that you may charge certain persons not to teach any different doctrine, ",
  '4 nor to occupy them-selves with myths and endless genealogies which promote speculations rather than the divine training (1) that is in faith;',
  '5 whereas the aim of our charge is love that issues from a pure heart and a good conscience and sincere faith. ',
  '6 Cer-tain persons by swerving from these have wandered away into vain discussion, ',
  '7 desiring to be teachers of the law, without understanding either what they are saying or the things about which they make assertions.',
  '8 Now we know that the law is good, if anyone uses it lawfully, ',
  '9 understanding this, that the law is not laid down for the just 

In [9]:
print(len(verses))

2


In [10]:
d = difflib.Differ()

In [11]:
result = list(d.compare(verses[0], verses[1]))

In [12]:
from pprint import pprint

pprint(result)

['  1 Paul, an apostle of Christ Jesus by command of God our Savior and of '
 'Christ Jesus our hope,',
 '  2 To Timothy, my true child in the faith: Grace, mercy, and peace from God '
 'the Father and Christ Jesus our Lord.',
 "- 3 As I urged you when I was going to Mac-e-do'ni-a, remain at Ephesus that "
 'you may charge certain persons not to teach any different doctrine, ',
 '?                                                                                                                                               '
 '-\n',
 "+ 3 As I urged you when I was going to Mac-e-do'ni-a, remain at Eph'e-sus "
 'that you may charge certain persons not to teach any different doctrine,',
 '?                                                                  + +\n',
 '- 4 nor to occupy them-selves with myths and endless genealogies which '
 'promote speculations rather than the divine training (1) that is in faith;',
 '?                     -\n',
 '+ 4 nor to occupy themselves with myths and e

# cleaned_verses

In [13]:
# What if we remove all the footnote keys?

cleaned_verses = []

for text in verses:
    by_verse = []
    for line in text:
            #What do we want out of here?
            line = re.sub('\([0-9]*\)', '', line)
            line = re.sub('  ', ' ', line)
            line = re.sub('\n', '', line)
            line = line.strip()
            line = re.sub('[^A-Za-z0-9 ]+', '', line)
            line = line.lower()
            
            by_verse.append(line)
    cleaned_verses.append(by_verse)
        
cleaned_verses

[['1 paul an apostle of christ jesus by command of god our savior and of christ jesus our hope',
  '2 to timothy my true child in the faith grace mercy and peace from god the father and christ jesus our lord',
  '3 as i urged you when i was going to macedonia remain at ephesus that you may charge certain persons not to teach any different doctrine',
  '4 nor to occupy themselves with myths and endless genealogies which promote speculations rather than the divine training that is in faith',
  '5 whereas the aim of our charge is love that issues from a pure heart and a good conscience and sincere faith',
  '6 certain persons by swerving from these have wandered away into vain discussion',
  '7 desiring to be teachers of the law without understanding either what they are saying or the things about which they make assertions',
  '8 now we know that the law is good if anyone uses it lawfully',
  '9 understanding this that the law is not laid down for the just but for the lawless and disobed

In [14]:
d2 = difflib.Differ()

result = list(d2.compare(cleaned_verses[0], cleaned_verses[1]))

In [15]:
pprint(result)

['  1 paul an apostle of christ jesus by command of god our savior and of '
 'christ jesus our hope',
 '  2 to timothy my true child in the faith grace mercy and peace from god the '
 'father and christ jesus our lord',
 '  3 as i urged you when i was going to macedonia remain at ephesus that you '
 'may charge certain persons not to teach any different doctrine',
 '  4 nor to occupy themselves with myths and endless genealogies which '
 'promote speculations rather than the divine training that is in faith',
 '  5 whereas the aim of our charge is love that issues from a pure heart and '
 'a good conscience and sincere faith',
 '  6 certain persons by swerving from these have wandered away into vain '
 'discussion',
 '  7 desiring to be teachers of the law without understanding either what '
 'they are saying or the things about which they make assertions',
 '  8 now we know that the law is good if anyone uses it lawfully',
 '  9 understanding this that the law is not laid down for the

# trouble_verses

In [16]:
trouble_verses = []

for line in result:
    if re.match('^  ', line):
        pass
    else:
        trouble_verses.append(line)

In [17]:
pprint(trouble_verses)

['- 2 now a bishop must be above reproach married only once temperate sensible '
 'dignified hospitable an apt teacher',
 '?                                       ^ ^^^^   ^^^   ^\n',
 '+ 2 now a bishop must be above reproach the husband of one wife temperate '
 'sensible dignified hospitable an apt teacher',
 '?                                       ^^^^^^^^ ^   ^   ^^^^^\n',
 '- 12 let deacons be married only once and let them manage their children and '
 'their households well',
 '?                   ^ ^^^^   ^^^   ^\n',
 '+ 12 let deacons be the husband of one wife and let them manage their '
 'children and their households well',
 '?                   ^^^^^^^^ ^   ^   ^^^^^\n',
 '- 14 do not neglect the gift you have which was given you by prophetic '
 'utterance when the elders laid their hands upon you',
 '+ 14 do not neglect the gift you have which was given you by prophetic '
 'utterance when the council of elders laid their hands upon you',
 '?                                

# Results

## Insignificant:

In [19]:
# "NASB-1971-1Timothy.txt", "NASB-1977-1Timothy.txt"

## Significant

In [21]:
#"RSV-1946-1Timothy.txt", "RSV-1971-1Timothy.txt"
    #12-change from gender neutral "deacons be married" to "Let deacons be the husband of one wife"