# Imports

In [1]:
import pandas as pd
import numpy as np

# Read in data

In [2]:
df_summaries_texts = pd.read_csv('./data/working_data/summaries_and_full_texts.csv')

In [3]:
df_summaries_texts.head()

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,full_text
0,Candide: Summary: Chapters 1-2,Chapter 1\n Voltaire begins his picaresque tal...,Candide,Candide: Summary: Chapters 1-2,"9 CandidebyVoltaire\r\n\r\neditions, all of..."
1,Candide: Novel Summary: Chapters 1-2,Chapter 1: Voltaire begins his picaresque tale...,Candide,Chapters 1-2,"9 CandidebyVoltaire\r\n\r\neditions, all of..."
2,Candide: Novel Summary: Chapters 3-4,Chapter 3\nChapter 3 takes place in the midst ...,Candide,Chapters 3-4,"9 CandidebyVoltaire\r\n\r\neditions, all of..."
3,Candide: Novel Summary: Chapters 5-6,Chapter 5\nTraveling to Lisbon on a business t...,Candide,Chapters 5-6,"9 CandidebyVoltaire\r\n\r\neditions, all of..."
4,Candide: Novel Summary: Chapters 7-8,Chapter 7\nThe strange old woman takes Candide...,Candide,Chapters 7-8,"9 CandidebyVoltaire\r\n\r\neditions, all of..."


# Goal: to divide full texts of books into chapters corresponding with summaries to create dataset to be used in models

##### Through testing of code for this section, it has become clear that the full texts of books cannot actually be accessed through the 'full_text' column of the previously created 'summaries_and_full_texts.csv', likely due to limits on the amount of data stored and saved in a csv file in this environment. Full texts will be accessed through 'full_texts_filtered.csv'

## Save full texts of all books to text files

### Read in from df_full_texts_filtered and write to txt

In [3]:
#Read in filtered texts data

df_full_texts_filtered = pd.read_csv('./data/working_data/book_texts/full_texts_filtered.csv')

In [4]:
df_full_texts_filtered.head()

Unnamed: 0,full_text,book_title
0,"CandidebyVoltaire\r\n\r\neditions, all of whic...",Candide
1,The Age of InnocencebyEdith Wharton\n\n\n\n\n\...,The Age of Innocence
2,Pride and PrejudicebyJane Austen\n\n\n\n\r\nPr...,Pride and Prejudice
3,The Complete Works of William Shakespeare The ...,King Lear
4,Hard TimesbyCharles Dickens*\n\n\n\n\n\r\nHard...,Hard Times


In [5]:
df_full_texts_filtered.shape

(30, 2)

In [6]:
#List of unique book titles in df_summaries_texts

df_summaries_texts['book_title'].unique()

array(['Candide', 'The Age of Innocence', 'Pride and Prejudice',
       'King Lear', 'Hard Times', 'Merry Wives of Windsor', 'Cymbeline',
       'The Call of the Wild', 'The Inferno', 'Henry VIII',
       'Wuthering Heights', 'Divine Comedy',
       'The Adventures of Huckleberry Finn', 'Twelfth Night',
       'The Count of Monte Cristo', 'The Hound of the Baskervilles',
       'The Canterbury Tales', 'Crime and Punishment', 'My Antonia',
       'Adam Bede', 'Oliver Twist', 'David Copperfield', 'Ivanhoe',
       'As You Like It', 'The Jungle', 'The Red Badge of Courage',
       'Notes from the Underground', 'A Tale of Two Cities',
       'Winesburg, Ohio', 'Middlemarch'], dtype=object)

In [10]:
#Test saving of full text of one book to txt file

with open('./data/book_full_texts/Candide.txt', 'a') as fp:
    fp.write(df_full_texts_filtered.iloc[0]['full_text'])

In [11]:
#Save rest of full texts to txt files

for i in range(1,len(df_full_texts_filtered)):
    file_name = str(df_full_texts_filtered.iloc[i]['book_title']).replace(' ', '_')
    with open(f'./data/book_full_texts/{file_name}.txt', 'a') as fp:
        fp.write(df_full_texts_filtered.iloc[i]['full_text'])

### Delete tables of contents from full text txt files
Manually deleted tables of contents/lists of chapter or section names from books that had them at the beginning of the txt file to make it easier to locate correct chapter titles and partition texts into chapters 

## Test reading in and partitioning one text

### Open and read text

In [12]:
Adam_Bede = open('./data/book_full_texts/Adam_Bede.txt', 'r').read()

In [13]:
Adam_Bede[:200]

'Adam BedebyGeorge Eliot  [pseudonym of Mary Anne Evans]\n\n\n\n\n\nAdam Bede\nby George Eliot\n\n\n\n\n\nBook One\n\n\nChapter I\n\n\nThe Workshop\n\n\nWith a single drop of ink for a mirror, the Egyptian sorcerer\nundertak'

In [14]:
df_summaries_texts[df_summaries_texts['book_title']=='Adam Bede'][['chapters']].sort_values(by='chapters')

Unnamed: 0,chapters
266,Adam Bede: Summary


### Test partitioning

In [16]:
parititions = Adam_Bede.partition('Chapter II')

In [17]:
parititions[0][:500]

'Adam BedebyGeorge Eliot  [pseudonym of Mary Anne Evans]\n\n\n\n\n\nAdam Bede\nby George Eliot\n\n\n\n\n\nBook One\n\n\nChapter I\n\n\nThe Workshop\n\n\nWith a single drop of ink for a mirror, the Egyptian sorcerer\nundertakes to reveal to any chance comer far-reaching visions of\nthe past.  This is what I undertake to do for you, reader.  With\nthis drop of ink at the end of my pen, I will show you the roomy\nworkshop of Mr. Jonathan Burge, carpenter and builder, in the\nvillage of Hayslope, as it appeared on the eighteen'

In [18]:
parititions[1]

'Chapter II'

In [19]:
parititions[2][:500]

'\n\nThe Preaching\n\n\nAbout a quarter to seven there was an unusual appearance of\nexcitement in the village of Hayslope, and through the whole\nlength of its little street, from the Donnithorne Arms to the\nchurchyard gate, the inhabitants had evidently been drawn out of\ntheir houses by something more than the pleasure of lounging in\nthe evening sunshine.  The Donnithorne Arms stood at the entrance\nof the village, and a small farmyard and stackyard which flanked\nit, indicating that there was a pretty '

## Divide full texts into chapters corresponding to chapter summary titles

### List of book titles

In [4]:
list(df_summaries_texts['book_title'].unique())

['Candide',
 'The Age of Innocence',
 'Pride and Prejudice',
 'King Lear',
 'Hard Times',
 'Merry Wives of Windsor',
 'Cymbeline',
 'The Call of the Wild',
 'The Inferno',
 'Henry VIII',
 'Wuthering Heights',
 'Divine Comedy',
 'The Adventures of Huckleberry Finn',
 'Twelfth Night',
 'The Count of Monte Cristo',
 'The Hound of the Baskervilles',
 'The Canterbury Tales',
 'Crime and Punishment',
 'My Antonia',
 'Adam Bede',
 'Oliver Twist',
 'David Copperfield',
 'Ivanhoe',
 'As You Like It',
 'The Jungle',
 'The Red Badge of Courage',
 'Notes from the Underground',
 'A Tale of Two Cities',
 'Winesburg, Ohio',
 'Middlemarch']

### Function to return list of chapters for book

In [5]:
def chapters(title):
    return df_summaries_texts[df_summaries_texts['book_title']==title]['chapters']

### Function to split chapters

In [6]:
def split_chapters(title, chapter_names):

    text_name = title.replace(' ', '_')
    text = open(f'./data/book_full_texts/{text_name}.txt', 'r').read()
    num_chapters = len(chapter_names)
    chapter_texts = []

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    chapter_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        chapter_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    chapter_texts.append(chapter_last)

    return chapter_texts

### Candide

In [7]:
chapters('Candide')

0     Candide: Summary: Chapters 1-2
1                       Chapters 1-2
2                       Chapters 3-4
3                       Chapters 5-6
4                       Chapters 7-8
5                      Chapters 9-10
6                     Chapters 11-12
7                     Chapters 13-14
8                     Chapters 15-16
9                     Chapters 17-18
10                    Chapters 19-20
11                    Chapters 21-22
12                    Chapters 23-24
13                    Chapters 25-26
14                    Chapters 27-28
Name: chapters, dtype: object

Upon examination of the txt file, found that this text is written in French. The models to be built will be limited to English texts for now, so this book will not be included.

### The Age of Innocence - use to test function

#### Split full text into chapters

In [8]:
chapters('The Age of Innocence')

15    The Age of Innocence: Novel Summary
16                           Chapters 1-3
17                           Chapters 4-6
18                           Chapters 7-9
19                         Chapters 10-12
20                         Chapters 13-15
21                         Chapters 16-18
22                         Chapters 19-21
23                         Chapters 22-24
24                         Chapters 25-27
25                         Chapters 28-30
26                         Chapters 31-33
27                             Chapter 34
Name: chapters, dtype: object

In [9]:
ai_title = 'The Age of Innocence'

In [10]:
ai_chapter_names = [
'I.', 'II.', 'III.', 'IV.', 'V,', 'VI.', 'VII.', 'VIII.', 'IX.', 'X.',
'XI.', 'XII.', 'XIII.', 'XIV.', 'XV.', 'XVI.', 'XVII.', 'XVIII.', 'XIX.', 'XX.',
'XXI.', 'XXII.', 'XXIII.', 'XXIV.', 'XXV.', 'XXVI.', 'XXVII.', 'XXVIII.', 'XXIX.', 'XXX.',
'XXXI.', 'XXXII.', 'XXXIII.', 'XXXIV.' 
]

In [11]:
ai_chapter_texts = split_chapters(title=ai_title, chapter_names=ai_chapter_names)

In [12]:
len(ai_chapter_texts)

34

In [13]:
ai_chapter_texts[0][:300]

'The Age of InnocencebyEdith Wharton\n\n\n\n\n\nEtext prepared by  JudithBoss, proofed by Charles Keller.\n\n\n\n\n\n\nThe Age of Innocence\nby Edith Wharton\n\n\n\n\n\n\nBook I\n\n\n\nI.\n\nOn a January evening of the early seventies, Christine\nNilsson was singing in Faust at the Academy of\nMusic in New York.\n\nThough there wa'

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [14]:
df_summaries_texts[df_summaries_texts['book_title']==ai_title].drop(columns='full_text')

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
15,The Age of Innocence: Novel Summary,Chapters 1-3\nSummary\n\tThe story opens at th...,The Age of Innocence,The Age of Innocence: Novel Summary
16,The Age of Innocence: Novel Summary: Chapters 1-3,Chapters 1-3\n\n\t \nSummary\n\tThe story open...,The Age of Innocence,Chapters 1-3
17,The Age of Innocence: Novel Summary: Chapters 4-6,Chapters 4-6\nSummary\n\tArcher and May begin ...,The Age of Innocence,Chapters 4-6
18,The Age of Innocence: Novel Summary: Chapters 7-9,Chapters 7-9\nSummary\n\tMrs Archer and her so...,The Age of Innocence,Chapters 7-9
19,The Age of Innocence: Novel Summary: Chapters ...,Chapters 10-12\nSummary\n\tArcher tells May ab...,The Age of Innocence,Chapters 10-12
20,The Age of Innocence: Novel Summary: Chapters ...,"Chapters 13-15\nSummary\n\tAt the theatre, Arc...",The Age of Innocence,Chapters 13-15
21,The Age of Innocence: Novel Summary: Chapters ...,Chapters 16-18\nSummary\n\tArcher arrives at S...,The Age of Innocence,Chapters 16-18
22,The Age of Innocence: Novel Summary: Chapters ...,Chapters 19-21\nSummary\n\tUnder the eyes of N...,The Age of Innocence,Chapters 19-21
23,The Age of Innocence: Novel Summary: Chapters ...,Chapters 22-24\nSummary\n\tMr and Mrs Emerson ...,The Age of Innocence,Chapters 22-24
24,The Age of Innocence: Novel Summary: Chapters ...,Chapters 25-27\nSummary\n\tAs he leaves Boston...,The Age of Innocence,Chapters 25-27


In [15]:
df_ai = df_summaries_texts[df_summaries_texts['book_title']==ai_title].drop(columns='full_text')

In [16]:
df_ai.drop(index=15, inplace=True)

In [17]:
df_ai['chapter_text'] = [
    ' '.join(ai_chapter_texts[0:2]),
    ' '.join(ai_chapter_texts[3:5]),
    ' '.join(ai_chapter_texts[6:8]),
    ' '.join(ai_chapter_texts[9:11]),
    ' '.join(ai_chapter_texts[12:14]),
    ' '.join(ai_chapter_texts[15:17]),
    ' '.join(ai_chapter_texts[18:20]),
    ' '.join(ai_chapter_texts[21:23]),
    ' '.join(ai_chapter_texts[24:26]),
    ' '.join(ai_chapter_texts[27:29]),
    ' '.join(ai_chapter_texts[30:32]),
    ai_chapter_texts[33]
    ]

In [18]:
df_ai

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
16,The Age of Innocence: Novel Summary: Chapters 1-3,Chapters 1-3\n\n\t \nSummary\n\tThe story open...,The Age of Innocence,Chapters 1-3,The Age of InnocencebyEdith Wharton\n\n\n\n\n\...
17,The Age of Innocence: Novel Summary: Chapters 4-6,Chapters 4-6\nSummary\n\tArcher and May begin ...,The Age of Innocence,Chapters 4-6,\n\nIn the course of the next day the first of...
18,The Age of Innocence: Novel Summary: Chapters 7-9,Chapters 7-9\nSummary\n\tMrs Archer and her so...,The Age of Innocence,Chapters 7-9,\n\nMrs. Henry van der Luyden listened in sile...
19,The Age of Innocence: Novel Summary: Chapters ...,Chapters 10-12\nSummary\n\tArcher tells May ab...,The Age of Innocence,Chapters 10-12,"\n\nThe Countess Olenska had said ""after five""..."
20,The Age of Innocence: Novel Summary: Chapters ...,"Chapters 13-15\nSummary\n\tAt the theatre, Arc...",The Age of Innocence,Chapters 13-15,\n\nIt was a crowded night at Wallack's theatr...
21,The Age of Innocence: Novel Summary: Chapters ...,Chapters 16-18\nSummary\n\tArcher arrives at S...,The Age of Innocence,Chapters 16-18,\n\nWhen Archer walked down the sandy main str...
22,The Age of Innocence: Novel Summary: Chapters ...,Chapters 19-21\nSummary\n\tUnder the eyes of N...,The Age of Innocence,Chapters 19-21,"\n\nThe day was fresh, with a lively spring wi..."
23,The Age of Innocence: Novel Summary: Chapters ...,Chapters 22-24\nSummary\n\tMr and Mrs Emerson ...,The Age of Innocence,Chapters 22-24,"\n\nA party for the Blenkers--the Blenkers?""\n..."
24,The Age of Innocence: Novel Summary: Chapters ...,Chapters 25-27\nSummary\n\tAs he leaves Boston...,The Age of Innocence,Chapters 25-27,"\n\nOnce more on the boat, and in the presence..."
25,The Age of Innocence: Novel Summary: Chapters ...,Chapters 28-30\nSummary\n\tArcher sends a tele...,The Age of Innocence,Chapters 28-30,"\n\nOl-ol--howjer spell it, anyhow?"" asked the..."


### Function to get partial dataframe (chapter_title, chapter_summary, book_title, & chapters cols) for each book

In [19]:
def start_df(title):
    return df_summaries_texts[df_summaries_texts['book_title']==title].drop(columns='full_text')

### Pride and Prejudice

#### Split full text into chapters

In [20]:
chapters('Pride and Prejudice')

28      Chapters 1-4
29      Chapters 5-8
30     Chapters 9-12
31    Chapters 13-16
32    Chapters 17-20
33    Chapters 21-24
34    Chapters 25-28
35    Chapters 29-32
36    Chapters 33-36
37    Chapters 37-40
38    Chapters 41-44
39    Chapters 45-48
40    Chapters 49-52
41    Chapters 53-56
42    Chapters 57-60
43        Chapter 61
Name: chapters, dtype: object

In [21]:
pp_ch_names = []
for i in range(1, 62):
    pp_ch_names.append(f'Chapter {i}')

In [22]:
pp_ch_texts = split_chapters('Pride and Prejudice', pp_ch_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [23]:
pp_df = start_df('Pride and Prejudice')
pp_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
28,Pride and Prejudice: Novel Summary: Chapters 1-4,Chapter 1: Chapter one introduces Mr. and Mrs....,Pride and Prejudice,Chapters 1-4
29,Pride and Prejudice: Novel Summary: Chapters 5-8,Chapter 5: Sir William and Lady Lucas are furt...,Pride and Prejudice,Chapters 5-8
30,Pride and Prejudice: Novel Summary: Chapters 9-12,Chapter 9: Mrs. Bingley and her two youngest d...,Pride and Prejudice,Chapters 9-12
31,Pride and Prejudice: Novel Summary: Chapters 1...,Chapter 13: Mr. Bennet reveals that he has rec...,Pride and Prejudice,Chapters 13-16
32,Pride and Prejudice: Novel Summary: Chapters 1...,Chapter 17: The next day Elizabeth tells Jane ...,Pride and Prejudice,Chapters 17-20
33,Pride and Prejudice: Novel Summary: Chapters 2...,Chapter 21: Mr. Collins begins to ignore Eliza...,Pride and Prejudice,Chapters 21-24
34,Pride and Prejudice: Novel Summary: Chapters 2...,"Chapter 25: Mr. Collins returns to his parish,...",Pride and Prejudice,Chapters 25-28
35,Pride and Prejudice: Novel Summary: Chapters 2...,Chapter 29: The next day Collins talks about o...,Pride and Prejudice,Chapters 29-32
36,Pride and Prejudice: Novel Summary: Chapters 3...,Chapter 33: More than once Elizabeth has unexp...,Pride and Prejudice,Chapters 33-36
37,Pride and Prejudice: Novel Summary: Chapters 3...,Chapter 37: Darcy and Fitzwilliam leave Rosing...,Pride and Prejudice,Chapters 37-40


In [24]:
pp_df['chapter_text'] = [
    ' '.join(pp_ch_texts[0:3]),
    ' '.join(pp_ch_texts[4:7]),
    ' '.join(pp_ch_texts[8:11]),
    ' '.join(pp_ch_texts[12:15]),
    ' '.join(pp_ch_texts[16:19]),
    ' '.join(pp_ch_texts[20:23]),
    ' '.join(pp_ch_texts[24:27]),
    ' '.join(pp_ch_texts[28:31]),
    ' '.join(pp_ch_texts[32:35]),
    ' '.join(pp_ch_texts[36:39]),
    ' '.join(pp_ch_texts[40:43]),
    ' '.join(pp_ch_texts[44:47]),
    ' '.join(pp_ch_texts[48:51]),
    ' '.join(pp_ch_texts[52:55]),
    ' '.join(pp_ch_texts[56:59]),
    pp_ch_texts[60]
]

In [25]:
pp_df.head(3)

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
28,Pride and Prejudice: Novel Summary: Chapters 1-4,Chapter 1: Chapter one introduces Mr. and Mrs....,Pride and Prejudice,Chapters 1-4,Pride and PrejudicebyJane Austen\n\n\n\n\nPrid...
29,Pride and Prejudice: Novel Summary: Chapters 5-8,Chapter 5: Sir William and Lady Lucas are furt...,Pride and Prejudice,Chapters 5-8,\n\n\nWithin a short walk of Longbourn lived a...
30,Pride and Prejudice: Novel Summary: Chapters 9-12,Chapter 9: Mrs. Bingley and her two youngest d...,Pride and Prejudice,Chapters 9-12,\n\n\nElizabeth passed the chief of the night ...


### Start construction of main dataframe from first two books

In [26]:
main_df = pd.concat([df_ai, pp_df])

In [27]:
df_ai.shape

(12, 5)

In [28]:
pp_df.shape

(16, 5)

In [29]:
main_df.shape

(28, 5)

### King Lear

#### Split full text into chapters

In [30]:
chapters('King Lear')

44    Act 1, Scene 1-Act 1, Scene 2
45    Act 1, Scene 3-Act 1, Scene 4
46    Act 1, Scene 5-Act 2, Scene 1
47    Act 2, Scene 2-Act 2, Scene 3
48    Act 2, Scene 4-Act 3, Scene 1
49    Act 3, Scene 2-Act 3, Scene 3
50    Act 3, Scene 4-Act 3, Scene 5
51    Act 3, Scene 6-Act 3, Scene 7
52    Act 4, Scene 1-Act 4, Scene 2
53    Act 4, Scene 3-Act 4, Scene 4
54    Act 4, Scene 5-Act 4, Scene 6
55    Act 4, Scene 7-Act 5, Scene 1
56    Act 5, Scene 2-Act 5, Scene 3
Name: chapters, dtype: object

In [31]:
kl_act_names = [
    'ACT I.',
    'ACT II.',
    'ACT III.',
    'ACT IV',
    'ACT V'
]

In [32]:
kl_ch_names_list = []

In [33]:
kl_acts = split_chapters('King Lear', kl_act_names)

In [34]:
kl_act1_names =[
    'ACT I. Scene I.',
    'Scene II',
    'Scene III',
    'Scene IV',
    'Scene V'
]

kl_ch_names_list.append(kl_act1_names)

In [35]:
kl_act2_names = [ 
    'ACT II. Scene I.',
    'Scene II',
    'Scene III',
    'Scene IV',
]

kl_ch_names_list.append(kl_act2_names)

In [36]:
kl_act3_names = [
    'ACT III. Scene I',
    'Scene II',
    'Scene III',
    'Scene IV',
    'Scene V',
    'Scene VI',
    'Scene VII'
]

kl_ch_names_list.append(kl_act3_names)

In [37]:
kl_act4_names = [
    'ACT IV. Scene I',
    'Scene II',
    'Scene III',
    'Scene IV',
    'Scene V',
    'Scene VI',
    'Scene VII'
]

kl_ch_names_list.append(kl_act4_names)

In [38]:
kl_act5_names = [
    'ACT V. Scene I',
    'Scene II',
    'Scene III',
]

kl_ch_names_list.append(kl_act5_names)

In [39]:
len(kl_ch_names_list)

5

In [40]:
kl_texts = []
for i in range(5): 
    text = kl_acts[i]
    chapter_names = kl_ch_names_list[i]
    num_chapters = len(chapter_names)

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    kl_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        kl_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    kl_texts.append(chapter_last)

In [41]:
len(kl_texts)

26

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [42]:
kl_df = start_df('King Lear')
kl_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
44,"King Lear: Novel Summary: Act 1, Scene 1-Act 1...","Act 1, Scene 1: King Lear in his old age decid...",King Lear,"Act 1, Scene 1-Act 1, Scene 2"
45,"King Lear: Novel Summary: Act 1, Scene 3-Act 1...","\n\t \nAct 1, Scene 3: Goneril meets with the ...",King Lear,"Act 1, Scene 3-Act 1, Scene 4"
46,"King Lear: Novel Summary: Act 1, Scene 5-Act 2...","\n\t \nAct 1, Scene 5: Lear sends Kent, still ...",King Lear,"Act 1, Scene 5-Act 2, Scene 1"
47,"King Lear: Novel Summary: Act 2, Scene 2-Act 2...","\n\t \nAct 2, Scene 2: Kent and Oswald, messen...",King Lear,"Act 2, Scene 2-Act 2, Scene 3"
48,"King Lear: Novel Summary: Act 2, Scene 4-Act 3...","\n\t \nAct 2, Scene 4: Lear arrives at Glouces...",King Lear,"Act 2, Scene 4-Act 3, Scene 1"
49,"King Lear: Novel Summary: Act 3, Scene 2-Act 3...","\n\t \nAct 3, Scene 2: Lear and the Fool are c...",King Lear,"Act 3, Scene 2-Act 3, Scene 3"
50,"King Lear: Novel Summary: Act 3, Scene 4-Act 3...","\n\t \nAct 3, Scene 4: Lear and his men reach ...",King Lear,"Act 3, Scene 4-Act 3, Scene 5"
51,"King Lear: Novel Summary: Act 3, Scene 6-Act 3...","\n\t \nAct 3, Scene 6: When the men in the sto...",King Lear,"Act 3, Scene 6-Act 3, Scene 7"
52,"King Lear: Novel Summary: Act 4, Scene 1-Act 4...","\n\t \nAct 4, Scene 1: Gloucester is brought o...",King Lear,"Act 4, Scene 1-Act 4, Scene 2"
53,"King Lear: Novel Summary: Act 4, Scene 3-Act 4...","Act 4, Scene 3: Kent and the Gentleman meet in...",King Lear,"Act 4, Scene 3-Act 4, Scene 4"


In [43]:
kl_df['chapter_text'] =[
    ' '.join(kl_texts[0:1]),
    ' '.join(kl_texts[2:3]),
    ' '.join(kl_texts[4:5]),
    ' '.join(kl_texts[6:7]),
    ' '.join(kl_texts[8:9]),
    ' '.join(kl_texts[10:11]),
    ' '.join(kl_texts[12:13]),
    ' '.join(kl_texts[14:15]),
    ' '.join(kl_texts[16:17]),
    ' '.join(kl_texts[18:19]),
    ' '.join(kl_texts[20:21]),
    ' '.join(kl_texts[22:23]),
    ' '.join(kl_texts[24:25]),
]

#### Add book to main dataframe

In [44]:
main_df = pd.concat([main_df, kl_df])

In [45]:
len(main_df)

41

### Hard Times

#### Split full text into chapters

In [46]:
chapters('Hard Times')

57    Hard Times: Summary: Book1Chapters 1-4
58                         Book1Chapters 1-4
59                         Book1Chapters 5-8
60                        Book1Chapters 9-12
61                       Book1Chapters 13-16
62                         Book2Chapters 1-4
63                         Book2Chapters 5-8
64                        Book2Chapters 9-12
65                         Book3Chapters 1-4
66                         Book3Chapters 5-9
Name: chapters, dtype: object

In [47]:
ht_chap_names =[
    'CHAPTER I - THE ONE THING NEEDFUL',
    'CHAPTER II - MURDERING THE INNOCENTS',
    'CHAPTER III - A LOOPHOLE',
    'CHAPTER IV - MR. BOUNDERBY',
    'CHAPTER V - THE KEYNOTE',
    'CHAPTER VI - SLEARY\'S HORSEMANSHIP',
    'CHAPTER VII - MRS. SPARSIT',
    'CHAPTER VIII - NEVER WONDER',
    'CHAPTER IX - SISSY\'S PROGRESS',
    'CHAPTER X - STEPHEN BLACKPOOL',
    'CHAPTER XI - NO WAY OUT',
    'CHAPTER XII - THE OLD WOMAN',
    'CHAPTER XIII - RACHAEL',
    'CHAPTER XIV - THE GREAT MANUFACTURER',
    'CHAPTER XV - FATHER AND DAUGHTER',
    'CHAPTER XVI - HUSBAND AND WIFE',
    'CHAPTER I - EFFECTS IN THE BANK',
    'CHAPTER II - MR. JAMES HARTHOUSE',
    'CHAPTER III - THE WHELP',
    'CHAPTER IV - MEN AND BROTHERS',
    'CHAPTER V - MEN AND MASTERS',
    'CHAPTER VI - FADING AWAY',
    'CHAPTER VII - GUNPOWDER',
    'CHAPTER VIII - EXPLOSION',
    'CHAPTER IX - HEARING THE LAST OF IT',
    'CHAPTER X - MRS. SPARSIT\'S STAIRCASE',
    'CHAPTER XI - LOWER AND LOWER',
    'CHAPTER XII - DOWN',
    'CHAPTER I - ANOTHER THING NEEDFUL',
    'CHAPTER II - VERY RIDICULOUS',
    'CHAPTER III - VERY DECIDED',
    'CHAPTER IV - LOST',
    'CHAPTER V - FOUND',
    'CHAPTER VI - THE STARLIGHT',
    'CHAPTER VII - WHELP-HUNTING',
    'CHAPTER VIII - PHILOSOPHICAL',
    'CHAPTER IX - FINAL'
]

In [48]:
ht_texts = split_chapters('Hard Times', ht_chap_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [49]:
chapters('Hard Times')

57    Hard Times: Summary: Book1Chapters 1-4
58                         Book1Chapters 1-4
59                         Book1Chapters 5-8
60                        Book1Chapters 9-12
61                       Book1Chapters 13-16
62                         Book2Chapters 1-4
63                         Book2Chapters 5-8
64                        Book2Chapters 9-12
65                         Book3Chapters 1-4
66                         Book3Chapters 5-9
Name: chapters, dtype: object

In [50]:
len(ht_texts)

37

In [51]:
ht_chapters = [
    ' '.join(ht_texts[0:3]),
    ' '.join(ht_texts[0:3]),
    ' '.join(ht_texts[4:7]),
    ' '.join(ht_texts[8:11]),
    ' '.join(ht_texts[12:15]),
    ' '.join(ht_texts[16:19]),
    ' '.join(ht_texts[20:23]),
    ' '.join(ht_texts[24:27]),
    ' '.join(ht_texts[28:31]),
    ' '.join(ht_texts[32:36]),
]

In [52]:
df_ht = start_df('Hard Times')
df_ht

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
57,Hard Times: Summary: Book1Chapters 1-4,Book the First: Sowing Chapters 1-4\n\tThe fir...,Hard Times,Hard Times: Summary: Book1Chapters 1-4
58,Hard Times: Novel Summary: Book1Chapters 1-4,Book1 Chapters 1-4\n\n\t \nBook the First: Sow...,Hard Times,Book1Chapters 1-4
59,Hard Times: Novel Summary: Book1Chapters 5-8,\n\t \nBook the First: Sowing\n\tChapters 5-8\...,Hard Times,Book1Chapters 5-8
60,Hard Times: Novel Summary: Book1Chapters 9-12,Book1 Chapters 9-12\n\n\t \nBook the First: So...,Hard Times,Book1Chapters 9-12
61,Hard Times: Novel Summary: Book1Chapters 13-16,\n\t \nBook the First: Sowing\n\tChapters 13-1...,Hard Times,Book1Chapters 13-16
62,Hard Times: Novel Summary: Book2Chapters 1-4,Book2 Chapters 1-4\n\n\t \nBook the Second: Re...,Hard Times,Book2Chapters 1-4
63,Hard Times: Novel Summary: Book2Chapters 5-8,\n\t \nBook the Second: Reaping\n\tChapters 5-...,Hard Times,Book2Chapters 5-8
64,Hard Times: Novel Summary: Book2Chapters 9-12,Book2 Chapters 9-12\n\n\t \nBook the Second: R...,Hard Times,Book2Chapters 9-12
65,Hard Times: Novel Summary: Book3Chapters 1-4,\n\t \nBook the Third: Garnering\n\tChapters 1...,Hard Times,Book3Chapters 1-4
66,Hard Times: Novel Summary: Book3Chapters 5-9,Book3 Chapters 5-9\n\n\t \nBook the Third: Gar...,Hard Times,Book3Chapters 5-9


In [53]:
df_ht['chapter_text'] = ht_chapters

In [54]:
df_ht.iloc[0]

chapter_title                 Hard Times: Summary: Book1Chapters 1-4
chapter_summary    Book the First: Sowing Chapters 1-4\n\tThe fir...
book_title                                                Hard Times
chapters                      Hard Times: Summary: Book1Chapters 1-4
chapter_text       Hard TimesbyCharles Dickens*\n\n\n\n\n\nHard T...
Name: 57, dtype: object

In [55]:
df_ht.index

Int64Index([57, 58, 59, 60, 61, 62, 63, 64, 65, 66], dtype='int64')

In [56]:
df_ht.drop(index=57, inplace=True)

#### Add book to main dataframe

In [57]:
main_df = pd.concat([main_df, df_ht])

In [58]:
len(main_df)

50

### Merry Wives of Windsor

#### Split full text into chapters

In [59]:
chapters('Merry Wives of Windsor')

67    Merry Wives of Windsor: Novel Summary
68                          Act I Scene 1-2
69                            Act I Scene 3
70                            Act I Scene 4
71                           Act II Scene 1
72                           Act II Scene 2
73         Act II Scene 3 - Act III Scene 1
74                          Act III Scene 2
75                          Act III Scene 3
76                          Act III Scene 4
77                          Act III Scene 5
78                         Act IV Scene 1-2
79                         Act IV Scene 3-4
80                         Act IV Scene 5-6
81                          Act V Scene 1-5
Name: chapters, dtype: object

In [60]:
mw_act_names = [
    'ACT I.',
    'ACT II.',
    'ACT III.',
    'ACT IV',
    'ACT V'
]

In [61]:
mw_ch_names_list = []

In [62]:
mw_acts = split_chapters('Merry Wives of Windsor', mw_act_names)

In [63]:
mw_act1_names =[
    'ACT I. Scene 1.',
    'Scene 2.',
    'Scene 3.',
    'Scene 4.',
]
mw_ch_names_list.append(mw_act1_names)


mw_act2_names = [ 
    'ACT II. Scene 1.',
    'Scene 2.',
    'Scene 3.',
]
mw_ch_names_list.append(mw_act2_names)


mw_act3_names = [
    'ACT III. Scene 1.',
    'Scene 2.',
    'Scene 3.',
    'Scene 4.',
    'Scene 5.',
    'Scene 6.',
]
mw_ch_names_list.append(mw_act3_names)


mw_act4_names = [
    'ACT IV. Scene I.',
    'Scene 2.',
    'Scene 3.',
    'Scene 4.',
    'Scene 5.',
    'Scene 6.',
]
mw_ch_names_list.append(mw_act4_names)


mw_act5_names = [
    'ACT V. Scene 1.',
    'Scene 2.',
    'Scene 3.',
    'Scene 4.',
    'Scene 5.',
]
mw_ch_names_list.append(mw_act5_names)

In [64]:
len(mw_ch_names_list)

5

In [65]:
mw_texts = []
for i in range(5): 
    text = mw_acts[i]
    chapter_names = mw_ch_names_list[i]
    num_chapters = len(chapter_names)

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    mw_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        mw_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    mw_texts.append(chapter_last)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [66]:
mw_df = start_df('Merry Wives of Windsor')
mw_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
67,Merry Wives of Windsor: Novel Summary,Act I Scene 1-2\n\tThe Merry Wives of Windsor ...,Merry Wives of Windsor,Merry Wives of Windsor: Novel Summary
68,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 1-2\n\tThe Merry Wives of W...,Merry Wives of Windsor,Act I Scene 1-2
69,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 3\n\tFalstaff tells the Hos...,Merry Wives of Windsor,Act I Scene 3
70,Merry Wives of Windsor: Novel Summary: Act I S...,"\n\t \nAct I Scene 4\n\tAt Dr. Caius's house, ...",Merry Wives of Windsor,Act I Scene 4
71,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 1\n\tMrs. Page reads the l...,Merry Wives of Windsor,Act II Scene 1
72,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 2\n\tPistol asks Falstaff ...,Merry Wives of Windsor,Act II Scene 2
73,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 3\n\t- Act III Scene 1\n\t...,Merry Wives of Windsor,Act II Scene 3 - Act III Scene 1
74,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 2\n\tFord encounters Mrs....,Merry Wives of Windsor,Act III Scene 2
75,Merry Wives of Windsor: Novel Summary: Act III...,"\n\t \nAct III Scene 3\n\tAt Ford's house, Mrs...",Merry Wives of Windsor,Act III Scene 3
76,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 4\n\tFenton tells Anne Pa...,Merry Wives of Windsor,Act III Scene 4


In [67]:
mw_df.drop(index=67, inplace=True)

In [68]:
len(mw_df)

14

In [69]:
mw_df['chapter_text'] = [
    ' '.join(mw_texts[0:1]),
    (mw_texts[2]),
    (mw_texts[3]),
    (mw_texts[4]),
    (mw_texts[5]),
    ' '.join(mw_texts[6:7]),
    (mw_texts[8]),
    (mw_texts[9]),
    (mw_texts[10]),
    (mw_texts[11]),
    ' '.join(mw_texts[12:13]),
    ' '.join(mw_texts[14:15]),
    ' '.join(mw_texts[16:17]),
    ' '.join(mw_texts[18:22]),
]

#### Add book to main dataframe

In [70]:
main_df = pd.concat([main_df, mw_df])

In [71]:
len(main_df)

64

### Cymbeline

#### Split full text into chapters

In [72]:
chapters('Cymbeline')

82     Act 1 Scene 1
83     Act 1 Scene 2
84     Act 1 Scene 3
85     Act 1 Scene 4
86     Act 1 Scene 5
87     Act 1 Scene 6
88     Act 1 Scene 7
89     Act 2 Scene 1
90     Act 2 Scene 2
91     Act 2 Scene 3
92     Act 2 Scene 4
93     Act 3 Scene 1
94     Act 3 Scene 2
95     Act 3 Scene 3
96     Act 3 Scene 4
97     Act 3 Scene 5
98     Act 3 Scene 6
99     Act 3 Scene 7
100    Act 3 Scene 8
101    Act 4 Scene 1
102    Act 4 Scene 2
103    Act 4 Scene 3
104    Act 4 Scene 4
105    Act 5 Scene 1
106    Act 5 Scene 2
107    Act 5 Scene 3
108    Act 5 Scene 4
109    Act 5 Scene 5
Name: chapters, dtype: object

In [73]:
c_act_names = [
    'ACT I.',
    'ACT II.',
    'ACT III.',
    'ACT IV',
    'ACT V'
]

In [74]:
c_ch_names_list = []

In [75]:
c_acts = split_chapters('Cymbeline', c_act_names)

In [76]:
c_act1_names =[
    'ACT I. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.',
    'SCENE V.',
    'SCENE VI.',
    'SCENE VII.'
]
c_ch_names_list.append(c_act1_names)


c_act2_names = [ 
    'ACT II. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.'
]
c_ch_names_list.append(c_act2_names)


c_act3_names = [
    'ACT III. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.',
    'SCENE V.',
    'SCENE VI.',
    'SCENE VII.',
    'SCENE VIII'
]
c_ch_names_list.append(c_act3_names)


c_act4_names = [
    'ACT IV. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.'
]
c_ch_names_list.append(c_act4_names)


c_act5_names = [
    'ACT V. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.',
    'SCENE V.'
]
c_ch_names_list.append(c_act5_names)

In [77]:
c_texts = []
for i in range(5): 
    text = c_acts[i]
    chapter_names = c_ch_names_list[i]
    num_chapters = len(chapter_names)

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    c_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        c_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    c_texts.append(chapter_last)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [78]:
df_c = start_df('Cymbeline')
df_c

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
82,Cymbeline: Novel Summary: Act 1 Scene 1,"Summary\n\tAct 1, scene 1\n\tTwo gentlemen are...",Cymbeline,Act 1 Scene 1
83,Cymbeline: Novel Summary: Act 1 Scene 2,Summary\n\tThe Queen tells Imogen that she is ...,Cymbeline,Act 1 Scene 2
84,Cymbeline: Novel Summary: Act 1 Scene 3,Summary\n\tThe First Lord advises Cloten to ch...,Cymbeline,Act 1 Scene 3
85,Cymbeline: Novel Summary: Act 1 Scene 4,Summary\n\tImogen questions Pisanio about his ...,Cymbeline,Act 1 Scene 4
86,Cymbeline: Novel Summary: Act 1 Scene 5,Summary\n\tThe scene is set in Rome at the hou...,Cymbeline,Act 1 Scene 5
87,Cymbeline: Novel Summary: Act 1 Scene 6,"Summary\n\tAt Cymbeline's palace, the Queen is...",Cymbeline,Act 1 Scene 6
88,Cymbeline: Novel Summary: Act 1 Scene 7,Summary\n\tImogen is lamenting her unhappy sit...,Cymbeline,Act 1 Scene 7
89,Cymbeline: Novel Summary: Act 2 Scene 1,Summary\n\tCloten is playing bowls with some L...,Cymbeline,Act 2 Scene 1
90,Cymbeline: Novel Summary: Act 2 Scene 2,Summary\n\tThe scene is set in Imogen's bedroo...,Cymbeline,Act 2 Scene 2
91,Cymbeline: Novel Summary: Act 2 Scene 3,"Summary\n\tCloten is playing some Lords, eithe...",Cymbeline,Act 2 Scene 3


In [79]:
df_c['chapter_text'] = c_texts

#### Add book to main dataframe

In [80]:
main_df = pd.concat([main_df, df_c])

In [81]:
len(main_df)

92

### The Call of the Wild

#### Split full text into chapters

In [82]:
chapters('The Call of the Wild')

110     The Call of the Wild: Novel Summary 
111                                Chapter 1
112                                Chapter 2
113                                Chapter 3
114                                Chapter 4
115                                Chapter 5
116                                Chapter 6
117                                Chapter 7
Name: chapters, dtype: object

In [83]:
cw_ch_names = ['Chapter I', 'Chapter II', 'Chapter III', 'Chapter IV', 'Chapter V', 'Chapter VI', 'Chapter VII']

In [84]:
cw_ch_texts = split_chapters('The Call of the Wild', cw_ch_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [85]:
cw_df = start_df('The Call of the Wild')
cw_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
110,The Call of the Wild: Novel Summary,Chapter 1\n\nSummary\n\tThe story begins in th...,The Call of the Wild,The Call of the Wild: Novel Summary
111,The Call of the Wild: Novel Summary: Chapter 1,Chapter 1\nSummary\n\tThe story begins in the ...,The Call of the Wild,Chapter 1
112,The Call of the Wild: Novel Summary: Chapter 2,Summary\n\tBuck's first day on the beach at th...,The Call of the Wild,Chapter 2
113,The Call of the Wild: Novel Summary: Chapter 3,Chapter 3\nSummary\n\tThe deadly rivalry betwe...,The Call of the Wild,Chapter 3
114,The Call of the Wild: Novel Summary: Chapter 4,"Summary\n\tThe next morning, as Franeois harne...",The Call of the Wild,Chapter 4
115,The Call of the Wild: Novel Summary: Chapter 5,"Chapter 5\nSummary\n\tAfter reaching Dawson, t...",The Call of the Wild,Chapter 5
116,The Call of the Wild: Novel Summary: Chapter 6,Summary\n\tStaying with Thornton during the sp...,The Call of the Wild,Chapter 6
117,The Call of the Wild: Novel Summary: Chapter 7,Chapter 7\nSummary\n\tThe money Buck wins for ...,The Call of the Wild,Chapter 7


In [86]:
cw_df.drop(index=110, inplace=True)

In [87]:
cw_df['chapter_text'] = cw_ch_texts

#### Add book to main dataframe

In [88]:
main_df = pd.concat([main_df, cw_df])

In [89]:
len(main_df)

99

### The Inferno

#### Split full text into chapters

In [90]:
chapters('The Inferno')

118    The Inferno: Novel Summary
119                       Canto 1
120                       Canto 2
121                       Canto 3
122                       Canto 4
123                       Canto 5
124                       Canto 6
125                       Canto 7
126                       Canto 8
127                       Canto 9
128                      Canto 10
129                      Canto 11
130                      Canto 12
131                      Canto 13
132                   Canto 14-15
133                   Canto 16-17
134                      Canto 18
135                      Canto 19
136                      Canto 20
137                   Canto 21-22
138                      Canto 23
139                   Canto 24-25
140                      Canto 26
141                      Canto 27
142                      Canto 28
143                      Canto 29
144                      Canto 30
145                      Canto 31
146                   Canto 32-33
147           

The chapters and the title contained in the data here are mismatched (due to an error in prior data organization/processing). These chapter titles appear to be from Dante's Inferno, however the book being referenced and the full text downloaded is The Inferno by Henri Barbusse

##### Text of Dante's Inferno downloaded separately from https://www.fulltextarchive.com/page/Dante-s-Inferno8/ and added to txt file

In [91]:
di_chap_names = []

for i in range(1, 35):
    di_chap_names.append(f'Canto {i}')

In [92]:
di_ch_texts = split_chapters('Dantes Inferno', di_chap_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [93]:
di_df = start_df('The Inferno')
di_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
118,The Inferno: Novel Summary,Summary\nIn the middle of the journey of his l...,The Inferno,The Inferno: Novel Summary
119,The Inferno: Novel Summary: Canto 1,Summary\n\tIn the middle of the journey of his...,The Inferno,Canto 1
120,The Inferno: Novel Summary: Canto 2,"Summary\n\tDante and Virgil have set out, and ...",The Inferno,Canto 2
121,The Inferno: Novel Summary: Canto 3,Summary\n\tThe dire words inscribed over the g...,The Inferno,Canto 3
122,The Inferno: Novel Summary: Canto 4,"Summary\n\tDante wakes up and finds himself ""o...",The Inferno,Canto 4
123,The Inferno: Novel Summary: Canto 5,Summary\n\tWhen he first enters the Second Cir...,The Inferno,Canto 5
124,The Inferno: Novel Summary: Canto 6,"Summary\n\tDuring his swoon, Dante has somehow...",The Inferno,Canto 6
125,The Inferno: Novel Summary: Canto 7,Summary\n\tPlutus (in ancient mythology the go...,The Inferno,Canto 7
126,The Inferno: Novel Summary: Canto 8,Summary\n\tWhen Dante and Virgil come to the f...,The Inferno,Canto 8
127,The Inferno: Novel Summary: Canto 9,"Summary\n\tSeeing Dante's fear, Virgil tries t...",The Inferno,Canto 9


In [94]:
di_df.drop(index=118, inplace=True)

In [95]:
len(di_df)

29

In [96]:
di_bk_name = []
for i in range(29):
    di_bk_name.append('Dantes Inferno')

di_df['book_title'] = di_bk_name

In [97]:
di_df['chapter_text'] = [
    di_ch_texts[0],
    di_ch_texts[1],
    di_ch_texts[2],
    di_ch_texts[3],
    di_ch_texts[4],
    di_ch_texts[5],
    di_ch_texts[6],
    di_ch_texts[7],
    di_ch_texts[8],
    di_ch_texts[9],
    di_ch_texts[10],
    di_ch_texts[11],
    di_ch_texts[12],
    ' '.join(di_ch_texts[13:14]),
    ' '.join(di_ch_texts[15:16]),
    di_ch_texts[17],
    di_ch_texts[18],
    di_ch_texts[19],
    ' '.join(di_ch_texts[20:21]),
    di_ch_texts[22],
    ' '.join(di_ch_texts[23:24]),
    di_ch_texts[25],
    di_ch_texts[26],
    di_ch_texts[27],
    di_ch_texts[28],
    di_ch_texts[29],
    di_ch_texts[30],
    ' '.join(di_ch_texts[31:32]),
    di_ch_texts[33],
]

#### Add book to main dataframe

In [98]:
main_df = pd.concat([main_df, di_df])

In [99]:
len(main_df)

128

### Henry VIII

#### Split full text into chapters

In [100]:
chapters('Henry VIII')

148    Henry VIII: Summary: The Prologue
149                        Act 1 Scene 1
150                        Act 1 Scene 2
151                    Act 1 Scene 3 & 4
152                        Act 2 Scene 1
153                        Act 2 Scene 2
154                        Act 2 Scene 3
155                        Act 2 Scene 4
156                        Act 3 Scene 1
157                        Act 3 Scene 2
158                   Act 4 Scenes 1 & 2
159                        Act 5 Scene 1
160                        Act 5 Scene 2
161     Act 5 Scenes 3 & 4, and Epilogue
Name: chapters, dtype: object

In [101]:
h_act_names = [
    'ACT I.',
    'ACT II.',
    'ACT THIRD',
    'ACT FOURTH',
    'ACT FIFTH'
]

In [102]:
h_ch_names_list = []

In [103]:
h_acts = split_chapters('Henry VIII', h_act_names)

In [104]:
h_act1_names =[
    'SCENE 1.',
    'SCENE II. The same. The council-chamber.',
    'SCENE III. An ante-chamber in the palace.',
    'SCENE IV. A Hall in York Place.',
]
h_ch_names_list.append(h_act1_names)


h_act2_names = [ 
    'SCENE 1. Westminster. A street.',
    'SCENE II.  An ante-chamber in the palace.',
    'SCENE III.  An ante-chamber of the Queen\'s apartments.',
    'SCENE IV.  A hall in Black-Friars.',

]
h_ch_names_list.append(h_act2_names)


h_act3_names = [
    'SCENE I.  London. The Queen\'s apartments.',
    'SCENE II.  Ante-chamber to the King\'s apartment.'
]
h_ch_names_list.append(h_act3_names)


h_act4_names = [
    'SCENE I.  A street in Westminster.',
    'SCENE II. Kimbolton.'
]
h_ch_names_list.append(h_act4_names)


h_act5_names = [
    'SCENE I.  A gallery in the palace.',
    'SCENE II.  Lobby before the council-chamber.',
    'SCENE III.  The council-chamber.',
    'SCENE IV. The palace yard.',
    'EPILOGUE'
]
h_ch_names_list.append(h_act5_names)

In [105]:
h_texts = []
for i in range(5): 
    text = h_acts[i]
    chapter_names = h_ch_names_list[i]
    num_chapters = len(chapter_names)

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    h_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        h_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    h_texts.append(chapter_last)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [106]:
h_df = start_df('Henry VIII')
h_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
148,Henry VIII: Summary: The Prologue,\n\tThe Prologue enters and explains that he i...,Henry VIII,Henry VIII: Summary: The Prologue
149,Henry VIII: Novel Summary: Act 1 Scene 1,\n\t \nSummary\n\tThe play opens in a room at ...,Henry VIII,Act 1 Scene 1
150,Henry VIII: Novel Summary: Act 1 Scene 2,"\n\t \nSummary\n\tThe king, leaning on Wolsey'...",Henry VIII,Act 1 Scene 2
151,Henry VIII: Novel Summary: Act 1 Scene 3 & 4,\n\t \nSummary\n\tThe Lord Chamberlain and Lor...,Henry VIII,Act 1 Scene 3 & 4
152,Henry VIII: Novel Summary: Act 2 Scene 1,\n\t \nSummary\n\tTwo Gentlemen meet in the st...,Henry VIII,Act 2 Scene 1
153,Henry VIII: Novel Summary: Act 2 Scene 2,\n\t \nSummary\n\tThe Lord Chamberlain is read...,Henry VIII,Act 2 Scene 2
154,Henry VIII: Novel Summary: Act 2 Scene 3,\n\t \nSummary\n\tAnne Bullen tells the Old La...,Henry VIII,Act 2 Scene 3
155,Henry VIII: Novel Summary: Act 2 Scene 4,"\n\t \nSummary\n\tAt Blackfriars, various bish...",Henry VIII,Act 2 Scene 4
156,Henry VIII: Novel Summary: Act 3 Scene 1,\n\t \nSummary\n\tKatherine is sewing with her...,Henry VIII,Act 3 Scene 1
157,Henry VIII: Novel Summary: Act 3 Scene 2,"\n\t \nSummary\n\tNorfolk, Suffolk, Surrey and...",Henry VIII,Act 3 Scene 2


In [107]:
h_df.drop(index=148, inplace=True)

In [108]:
h_df['chapter_text'] = [
    h_texts[0],
    h_texts[1],
    ' '.join(h_texts[2:3]),
    h_texts[4],
    h_texts[5],
    h_texts[6],
    h_texts[7],
    h_texts[8],
    h_texts[9],
    ' '.join(h_texts[10:11]),
    h_texts[12],
    h_texts[13],
    ' '.join(h_texts[14:16])
]

#### Add book to main dataframe

In [109]:
main_df = pd.concat([main_df, h_df])

In [110]:
len(main_df)

141

### Wuthering Heights

#### Split full text into chapters

In [111]:
chapters('Wuthering Heights')

162      Chapters 1-2
163      Chapters 3-4
164      Chapters 7-8
165     Chapters 9-10
166    Chapters 11-12
167    Chapters 13-14
168    Chapters 15-16
169    Chapters 17-18
170    Chapters 19-20
171    Chapters 21-22
172    Chapters 23-24
173    Chapters 25-26
174    Chapters 27-28
175    Chapters 29-30
176    Chapters 31-32
177    Chapters 33-34
Name: chapters, dtype: object

In [112]:
wh_ch_names = []

ch_numerals = [
'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VII', 'IX', 'X',
'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI', 'XVII', 'XVIII', 'XIX', 'XX',
'XXI', 'XXII', 'XXIII', 'XXIV', 'XXV', 'XXVI', 'XXVII', 'XXVIII', 'XXIX', 'XXX',
'XXXI', 'XXXII', 'XXXIII', 'XXXIV' 
]

for num in ch_numerals:
    wh_ch_names.append(f'CHAPTER {num}')

In [113]:
wh_ch_texts = split_chapters('Wuthering Heights', wh_ch_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [114]:
wh_df = start_df('Wuthering Heights')
wh_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
162,Wuthering Heights: Novel Summary: Chapters 1-2,"Chapter 1: While at the Heights, Lockwood pets...",Wuthering Heights,Chapters 1-2
163,Wuthering Heights: Novel Summary: Chapters 3-4,Chapter 3: Lockwood falls off to sleep reading...,Wuthering Heights,Chapters 3-4
164,Wuthering Heights: Novel Summary: Chapters 7-8,Chapter 7: Mrs. Dean cleans Heathcliff up and ...,Wuthering Heights,Chapters 7-8
165,Wuthering Heights: Novel Summary: Chapters 9-10,"Chapter 9: Ellen tells Catherine to be quiet, ...",Wuthering Heights,Chapters 9-10
166,Wuthering Heights: Novel Summary: Chapters 11-12,\n\t \nChapter 11: The next time Heathcliff co...,Wuthering Heights,Chapters 11-12
167,Wuthering Heights: Novel Summary: Chapters 13-14,Chapter 13: She writes that they are at Wuther...,Wuthering Heights,Chapters 13-14
168,Wuthering Heights: Novel Summary: Chapters 15-16,Chapter 15: Ellen starts to get nervous becaus...,Wuthering Heights,Chapters 15-16
169,Wuthering Heights: Novel Summary: Chapters 17-18,Chapter 17: She then tells Ellen what had been...,Wuthering Heights,Chapters 17-18
170,Wuthering Heights: Novel Summary: Chapters 19-20,Chapter 19: A letter arrives from Edgar sayin...,Wuthering Heights,Chapters 19-20
171,Wuthering Heights: Novel Summary: Chapters 21-22,"Chapter 21: When they enter the house, Cathy i...",Wuthering Heights,Chapters 21-22


In [115]:
wh_df['chapter_text'] = [
    ' '.join(wh_ch_texts[0:1]),
    ' '.join(wh_ch_texts[2:3]),
    ' '.join(wh_ch_texts[6:7]),
    ' '.join(wh_ch_texts[8:9]),
    ' '.join(wh_ch_texts[10:11]),
    ' '.join(wh_ch_texts[12:13]),
    ' '.join(wh_ch_texts[14:15]),
    ' '.join(wh_ch_texts[16:17]),
    ' '.join(wh_ch_texts[18:19]),
    ' '.join(wh_ch_texts[20:21]),
    ' '.join(wh_ch_texts[22:23]),
    ' '.join(wh_ch_texts[24:25]),
    ' '.join(wh_ch_texts[26:27]),
    ' '.join(wh_ch_texts[28:29]),
    ' '.join(wh_ch_texts[30:31]),
    ' '.join(wh_ch_texts[32:33]),
]

In [116]:
main_df = pd.concat([main_df, wh_df])

In [117]:
len(main_df)

157

### Divine Comedy is next in the list of books, however the text in this file is the same text that was uploaded previously in place of 'The Inferno'

### The Adventures of Huckleberry Finn

#### Split full text into chapters

In [118]:
chapters('The Adventures of Huckleberry Finn')

198      Chapters 1-3
199      Chapters 4-6
200      Chapters 7-9
201    Chapters 13-15
202    Chapters 16-18
203    Chapters 19-21
204    Chapters 22-24
205    Chapters 25-27
206    Chapters 28-30
207    Chapters 31-33
208    Chapters 34-36
209    Chapters 37-39
210    Chapters 40-42
211        Chapter 43
Name: chapters, dtype: object

In [119]:
hf_ch_names = ['CHAPTER I',
 'CHAPTER II',
 'CHAPTER III',
 'CHAPTER IV',
 'CHAPTER V',
 'CHAPTER VI',
 'CHAPTER VII',
 'CHAPTER VII',
 'CHAPTER IX',
 'CHAPTER X',
 'CHAPTER XI',
 'CHAPTER XII',
 'CHAPTER XIII',
 'CHAPTER XIV',
 'CHAPTER XV',
 'CHAPTER XVI',
 'CHAPTER XVII',
 'CHAPTER XVIII',
 'CHAPTER XIX',
 'CHAPTER XX',
 'CHAPTER XXI',
 'CHAPTER XXII',
 'CHAPTER XXIII',
 'CHAPTER XXIV',
 'CHAPTER XXV',
 'CHAPTER XXVI',
 'CHAPTER XXVII',
 'CHAPTER XXVIII',
 'CHAPTER XXIX',
 'CHAPTER XXX',
 'CHAPTER XXXI',
 'CHAPTER XXXII',
 'CHAPTER XXXIII',
 'CHAPTER XXXIV',
 'CHAPTER XXXV',
 'CHAPTER XXXVI',
 'CHAPTER XXXVII',
 'CHAPTER XXXVIII',
 'CHAPTER XXXIX',
 'CHAPTER XL',
 'CHAPTER XLI',
 'CHAPTER XLII',
 'CHAPTER THE LAST'
 ]

In [120]:
hf_ch_texts = split_chapters('The Adventures of Huckleberry Finn', hf_ch_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [121]:
hf_df = start_df('The Adventures of Huckleberry Finn')
hf_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
198,The Adventures of Huckleberry Finn: Novel Summ...,Chapters1-3\nChapter1: Huck begins his narrati...,The Adventures of Huckleberry Finn,Chapters 1-3
199,The Adventures of Huckleberry Finn: Novel Summ...,Chapters 4-6\n\n\t \nChapter 4: Huck explains ...,The Adventures of Huckleberry Finn,Chapters 4-6
200,The Adventures of Huckleberry Finn: Novel Summ...,Chapters 7-10\n\n\t \nChapter 7: Tired of bein...,The Adventures of Huckleberry Finn,Chapters 7-9
201,The Adventures of Huckleberry Finn: Novel Summ...,Chapters 11-15\n\n\t \nChapter 13: Getting int...,The Adventures of Huckleberry Finn,Chapters 13-15
202,The Adventures of Huckleberry Finn: Novel Summ...,\nChapters 16-18\n\n\t \nChapter 16: Believin...,The Adventures of Huckleberry Finn,Chapters 16-18
203,The Adventures of Huckleberry Finn: Novel Summ...,Chapters 19-21\n\n\t \nChapter 19: In this cha...,The Adventures of Huckleberry Finn,Chapters 19-21
204,The Adventures of Huckleberry Finn: Novel Summ...,Chapters 22-24\n\n\t \nChapter 22: The descrip...,The Adventures of Huckleberry Finn,Chapters 22-24
205,The Adventures of Huckleberry Finn: Novel Summ...,Chapters 25-27\n\n\t \nChapter 25: They arrive...,The Adventures of Huckleberry Finn,Chapters 25-27
206,The Adventures of Huckleberry Finn: Novel Summ...,Chapters 28-30\n\n\t \nChapter 28: In this cha...,The Adventures of Huckleberry Finn,Chapters 28-30
207,The Adventures of Huckleberry Finn: Novel Summ...,Chapters 31-33\n\n\t \nChapter 31: After trave...,The Adventures of Huckleberry Finn,Chapters 31-33


In [122]:
hf_df['chapter_text'] = [
    ' '.join(hf_ch_texts[0:2]),
    ' '.join(hf_ch_texts[3:5]),
    ' '.join(hf_ch_texts[6:8]),
    ' '.join(hf_ch_texts[12:14]),
    ' '.join(hf_ch_texts[15:17]),
    ' '.join(hf_ch_texts[18:20]),
    ' '.join(hf_ch_texts[21:23]),
    ' '.join(hf_ch_texts[24:26]),
    ' '.join(hf_ch_texts[27:29]),
    ' '.join(hf_ch_texts[30:32]),
    ' '.join(hf_ch_texts[33:35]),
    ' '.join(hf_ch_texts[36:38]),
    ' '.join(hf_ch_texts[39:41]),
    hf_ch_texts[42]
]

#### Add book to main dataframe

In [123]:
main_df = pd.concat([main_df, hf_df])

In [124]:
len(main_df)

171

### Twelfth Night

#### Split full text into chapters

In [125]:
chapters('Twelfth Night')

212    Act 1, Scene 1
213    Act 1, Scene 2
214    Act 1, Scene 3
215    Act 1, Scene 4
216    Act 1, Scene 5
217    Act 2, Scene 1
218    Act 2, Scene 2
219    Act 2, Scene 3
220    Act 2, Scene 4
221    Act 2, Scene 5
222    Act 3, Scene 1
223    Act 3, Scene 2
224    Act 3, Scene 3
225    Act 3, Scene 4
226    Act 4, Scene 1
227    Act 4, Scene 2
228    Act 4, Scene 3
229    Act 5, Scene 1
Name: chapters, dtype: object

In [126]:
tn_act_names = [
    'ACT I.',
    'ACT II.',
    'ACT III.',
    'ACT IV.',
]

In [127]:
tn_ch_names_list = []

In [128]:
tn_acts = split_chapters('Twelfth Night', tn_act_names)

In [129]:
tn_act1_names =[
    'ACT I. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.',
    'SCENE V.',
]
tn_ch_names_list.append(tn_act1_names)


tn_act2_names = [ 
    'ACT II. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.',
    'SCENE V.',
]
tn_ch_names_list.append(tn_act2_names)


tn_act3_names = [
    'ACT III. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.',
]
tn_ch_names_list.append(tn_act3_names)


tn_act4_names = [
    'ACT IV. SCENE I.',
    'SCENE II.',
    'SCENE III.',
]
tn_ch_names_list.append(tn_act4_names)

In [130]:
tn_texts = []
for i in range(4): 
    text = tn_acts[i]
    chapter_names = tn_ch_names_list[i]
    num_chapters = len(chapter_names)

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    tn_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        tn_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    tn_texts.append(chapter_last)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [131]:
tn_df = start_df('Twelfth Night')
tn_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
212,"Twelfth Night: Novel Summary: Act 1, Scene 1",The scene opens at the court of the Duke Orsin...,Twelfth Night,"Act 1, Scene 1"
213,"Twelfth Night: Novel Summary: Act 1, Scene 2",The woman Viola who is traveling with her brot...,Twelfth Night,"Act 1, Scene 2"
214,"Twelfth Night: Novel Summary: Act 1, Scene 3","In the house of Countess Olivia, her kinsmen S...",Twelfth Night,"Act 1, Scene 3"
215,"Twelfth Night: Novel Summary: Act 1, Scene 4","Viola, dressed as a boy Cesario, has become tr...",Twelfth Night,"Act 1, Scene 4"
216,"Twelfth Night: Novel Summary: Act 1, Scene 5","The jester, Feste, in Lady Olivia's house ente...",Twelfth Night,"Act 1, Scene 5"
217,"Twelfth Night: Novel Summary: Act 2, Scene 1",Viola's brother Sebastian is saved by a sailor...,Twelfth Night,"Act 2, Scene 1"
218,"Twelfth Night: Novel Summary: Act 2, Scene 2",Malvolio finds Cesario and gives him the messa...,Twelfth Night,"Act 2, Scene 2"
219,"Twelfth Night: Novel Summary: Act 2, Scene 3","Sir Toby, Sir Andrew, and the Fool are having ...",Twelfth Night,"Act 2, Scene 3"
220,"Twelfth Night: Novel Summary: Act 2, Scene 4","\n\t \nViola, Orsino, and Curio are at the cou...",Twelfth Night,"Act 2, Scene 4"
221,"Twelfth Night: Novel Summary: Act 2, Scene 5",\n\t \nMaria plants the letter meant for Malvo...,Twelfth Night,"Act 2, Scene 5"


In [132]:
tn_df.drop(index=229, inplace=True)

In [133]:
tn_df['chapter_text'] = [
    tn_texts[0],
    tn_texts[1],
    tn_texts[2],
    tn_texts[3],
    tn_texts[4],
    tn_texts[5],
    tn_texts[6],
    tn_texts[7],
    tn_texts[8],
    tn_texts[9],
    tn_texts[10],
    tn_texts[11],
    tn_texts[12],
    tn_texts[13],
    tn_texts[14],
    tn_texts[15],
    tn_texts[16],
]

#### Add book to main dataframe

In [134]:
main_df = pd.concat([main_df, tn_df])

### The Count of Monte Cristo

#### Split full text into chapters

In [135]:
chapters('The Count of Monte Cristo')

230      Chapter 1-5
231     Chapter 6-10
232    Chapter 11-15
233    Chapter 16-20
234    Chapter 21-25
235    Chapter 26-30
236    Chapter 31-35
237    Chapter 36-40
238    Chapter 41-45
239    Chapter 46-50
240    Chapter 51-55
241    Chapter 56-60
242    Chapter 61-65
243    Chapter 66-70
244    Chapter 71-73
Name: chapters, dtype: object

In [136]:
mc_chap_names = []

for i in range(73):
    mc_chap_names.append(f'Chapter {i}')

In [137]:
mc_chap_texts = split_chapters('The Count of Monte Cristo', mc_chap_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [138]:
df_mc = start_df('The Count of Monte Cristo')
df_mc

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
230,The Count of Monte Cristo: Novel Summary: Chap...,Chapter 1-5\nChapter 1: The story's setting is...,The Count of Monte Cristo,Chapter 1-5
231,The Count of Monte Cristo: Novel Summary: Chap...,Chapter 6-10\nChapter 6: After sleeping in pri...,The Count of Monte Cristo,Chapter 6-10
232,The Count of Monte Cristo: Novel Summary: Chap...,Chapter 11-15\nChapter 11: Dantes begins to be...,The Count of Monte Cristo,Chapter 11-15
233,The Count of Monte Cristo: Novel Summary: Chap...,Chapter 16-20\nChapter 16: Dantes returns to M...,The Count of Monte Cristo,Chapter 16-20
234,The Count of Monte Cristo: Novel Summary: Chap...,Chapter 21-25\nChapter 21: After the celebrati...,The Count of Monte Cristo,Chapter 21-25
235,The Count of Monte Cristo: Novel Summary: Chap...,"Chapter 26-30\nChapter 26: In this chapter, Da...",The Count of Monte Cristo,Chapter 26-30
236,The Count of Monte Cristo: Novel Summary: Chap...,Chapter 31-35\nChapter 31: In this chapter the...,The Count of Monte Cristo,Chapter 31-35
237,The Count of Monte Cristo: Novel Summary: Chap...,"Chapter 36-40\nChapter 36: Leaving the party, ...",The Count of Monte Cristo,Chapter 36-40
238,The Count of Monte Cristo: Novel Summary: Chap...,Chapter 41-45\nChapter 41:This chapter begins ...,The Count of Monte Cristo,Chapter 41-45
239,The Count of Monte Cristo: Novel Summary: Chap...,Chapter 46-50\nChapter 46:The count receives a...,The Count of Monte Cristo,Chapter 46-50


In [139]:
df_mc['chapter_text'] = [
    ' '.join(mc_chap_texts[0:4]),
    ' '.join(mc_chap_texts[5:9]),
    ' '.join(mc_chap_texts[10:14]),
    ' '.join(mc_chap_texts[15:19]),
    ' '.join(mc_chap_texts[20:24]),
    ' '.join(mc_chap_texts[25:29]),
    ' '.join(mc_chap_texts[30:34]),
    ' '.join(mc_chap_texts[35:39]),
    ' '.join(mc_chap_texts[40:44]),
    ' '.join(mc_chap_texts[45:49]),
    ' '.join(mc_chap_texts[50:54]),
    ' '.join(mc_chap_texts[55:59]),
    ' '.join(mc_chap_texts[60:64]),
    ' '.join(mc_chap_texts[65:69]),
    ' '.join(mc_chap_texts[70:72]),
]

#### Add book to main dataframe

In [140]:
main_df = pd.concat([main_df, df_mc])

In [141]:
len(main_df)

203

### The Hound of the Baskervilles

#### Split full text into chapters

In [142]:
chapters('The Hound of the Baskervilles')

245          Chapter 1
246      Chapter 2 - 5
247     Chapter 6 - 10
248    Chapter 11 - 15
Name: chapters, dtype: object

In [143]:
hb_chap_names = []

for i in range(15):
    hb_chap_names.append(f'Chapter {i}')

In [144]:
hb_chap_texts = split_chapters('The Hound of the Baskervilles', hb_chap_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [145]:
hb_df = start_df('The Hound of the Baskervilles')
hb_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
245,The Hound of the Baskervilles: Novel Summary: ...,\nNote: All page numbers in this summary and ...,The Hound of the Baskervilles,Chapter 1
246,The Hound of the Baskervilles: Novel Summary: ...,\nSummary: Mortimer reads to Holmes and Watso...,The Hound of the Baskervilles,Chapter 2 - 5
247,The Hound of the Baskervilles: Novel Summary: ...,\nSummary: Holmes urges Watson to report any ...,The Hound of the Baskervilles,Chapter 6 - 10
248,The Hound of the Baskervilles: Novel Summary: ...,"\nSummary: Watson questions Laura Lyons, who ...",The Hound of the Baskervilles,Chapter 11 - 15


In [146]:
hb_df['chapter_text'] = [
    hb_chap_texts[0],
    ' '.join(hb_chap_texts[1:4]),
    ' '.join(hb_chap_texts[5:9]),
    ' '.join(hb_chap_texts[10:14])
]

#### Add book to main dataframe

In [147]:
main_df = pd.concat([main_df, hb_df])

In [148]:
len(main_df)

207

### The Canterbury Tales

#### Split full text into chapters

In [149]:
chapters('The Canterbury Tales')

249    The Canterbury Tales: Novel Summary
Name: chapters, dtype: object

In [150]:
ct_df = start_df('The Canterbury Tales')
ct_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
249,The Canterbury Tales: Novel Summary,General Prologue\nSeveral pilgrims of various ...,The Canterbury Tales,The Canterbury Tales: Novel Summary


In [151]:
ct_df.iloc[0]['chapter_summary']

'General Prologue\nSeveral pilgrims of various social and ethical standing are on their way to Canterbury to visit the cathedral there.\xa0 They stay a night in an Inn, and the next day, the Innkeeper offers to join the group on the trip, and judge stories that they will tell to pass the time.\xa0 The person with the best story wins a free dinner paid by the other pilgrims.\xa0 The knight tells the first story.\xa0\n'

Only one short summary for the full text was collected for this book, so it will not be included in the final dataset

### Crime and Punishment

#### Split full text into chapters

In [152]:
chapters('Crime and Punishment')

250    Crime and Punishment: Summary: Part1, Chapter ...
251                    Part1, Chapter 3-Part1, Chapter 4
252                    Part1, Chapter 5-Part1, Chapter 6
253                    Part1, Chapter 7-Part2, Chapter 1
254                    Part2, Chapter 2-Part2, Chapter 3
255                  Part 2, Chapter 4-Part 2, Chapter 6
256                  Part 2, Chapter 6-Part 2, Chapter 7
257                  Part 3, Chapter 1-Part 3, Chapter 2
258                  Part 3, Chapter 3-Part 3, Chapter 4
259                  Part 3, Chapter 5-Part 3, Chapter 6
260                  Part 4, Chapter 1-Part 4, Chapter 2
261                  Part 4, Chapter 3-Part 4, Chapter 4
262                  Part 4, Chapter 5-Part 6, Chapter 1
263                  Part 6, Chapter 2-Part 6, Chapter 3
264                                    Part 6, Chapter 4
Name: chapters, dtype: object

In [153]:
chapters('Crime and Punishment')[250]

'Crime and Punishment: Summary: Part1, Chapter 1-Part1, Chapter 2'

In [154]:
cp_part_names = [
    'PART I',
    'PART II',
    'PART III',
    'PART IV',
    'PART V',
    'PART VI'
]

In [155]:
cp_ch_names_list = []

In [156]:
cp_parts = split_chapters('Crime and Punishment', cp_part_names)

In [157]:
cp_part1_names =[
    'CHAPTER I',
    'CHAPTER II',
    'CHAPTER III',
    'CHAPTER IV',
    'CHAPTER V',
    'CHAPTER VI',
    'CHAPTER VII',
]
cp_ch_names_list.append(cp_part1_names)


cp_part2_names = [ 
    'CHAPTER I',
    'CHAPTER II',
    'CHAPTER III',
    'CHAPTER IV',
    'CHAPTER V',
    'CHAPTER VI',
    'CHAPTER VII',
]
cp_ch_names_list.append(cp_part2_names)


cp_part3_names = [
    'CHAPTER I',
    'CHAPTER II',
    'CHAPTER III',
    'CHAPTER IV',
    'CHAPTER V',
    'CHAPTER VI',
]
cp_ch_names_list.append(cp_part3_names)


cp_part4_names = [
    'CHAPTER I',
    'CHAPTER II',
    'CHAPTER III',
    'CHAPTER IV',
    'CHAPTER V',
]
cp_ch_names_list.append(cp_part4_names)

cp_part5_names = [
    'CHAPTER I',
    'CHAPTER II',
    'CHAPTER III',
    'CHAPTER IV',
    'CHAPTER V',
]
cp_ch_names_list.append(cp_part5_names)

cp_part6_names = [
    'CHAPTER I',
    'CHAPTER II',
    'CHAPTER III',
    'CHAPTER IV',
]
cp_ch_names_list.append(cp_part6_names)

In [158]:
cp_texts = []
for i in range(6): 
    text = cp_parts[i]
    chapter_names = cp_ch_names_list[i]
    num_chapters = len(chapter_names)

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    cp_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        cp_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    cp_texts.append(chapter_last)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [159]:
cp_df = start_df('Crime and Punishment')
cp_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
250,"Crime and Punishment: Summary: Part1, Chapter ...","Part1, Chapter 1: We first meet Raskolnikov as...",Crime and Punishment,"Crime and Punishment: Summary: Part1, Chapter ..."
251,"Crime and Punishment: Novel Summary: Part1, Ch...","\n\t \nPart1, Chapter 3: The next day, Raskoln...",Crime and Punishment,"Part1, Chapter 3-Part1, Chapter 4"
252,"Crime and Punishment: Novel Summary: Part1, Ch...","\n\t \nPart1, Chapter 5: Raskolnikov opts not ...",Crime and Punishment,"Part1, Chapter 5-Part1, Chapter 6"
253,"Crime and Punishment: Novel Summary: Part1, Ch...","\n\t \nPart1, Chapter 7: The old woman cautiou...",Crime and Punishment,"Part1, Chapter 7-Part2, Chapter 1"
254,"Crime and Punishment: Novel Summary: Part2, Ch...","\n\t \nPart2, Chapter 2: He hides the stolen i...",Crime and Punishment,"Part2, Chapter 2-Part2, Chapter 3"
255,"Crime and Punishment: Novel Summary: Part 2, C...","\n\t \nPart 2, Chapter 4: Zosimov is a doctor ...",Crime and Punishment,"Part 2, Chapter 4-Part 2, Chapter 6"
256,"Crime and Punishment: Novel Summary: Part 2, C...","\n\t \nPart 2, Chapter 6: As soon as he is lef...",Crime and Punishment,"Part 2, Chapter 6-Part 2, Chapter 7"
257,"Crime and Punishment: Novel Summary: Part 3, C...","Part 3, Chapter 1: Pulcheria, his mother, want...",Crime and Punishment,"Part 3, Chapter 1-Part 3, Chapter 2"
258,"Crime and Punishment: Novel Summary: Part 3, C...","\n\t \nPart 3, Chapter 3: Raskolnikov reveals ...",Crime and Punishment,"Part 3, Chapter 3-Part 3, Chapter 4"
259,"Crime and Punishment: Novel Summary: Part 3, C...","\n\t \nPart 3, Chapter 5: Raskolnikov enters s...",Crime and Punishment,"Part 3, Chapter 5-Part 3, Chapter 6"


In [160]:
cp_df['chapter_text'] = [
    ' '.join(cp_texts[0:1]),
    ' '.join(cp_texts[2:3]),
    ' '.join(cp_texts[4:5]),
    ' '.join(cp_texts[6:7]),
    ' '.join(cp_texts[8:9]),
    ' '.join(cp_texts[10:12]),
    ' '.join(cp_texts[13:14]),
    ' '.join(cp_texts[15:16]),
    ' '.join(cp_texts[17:18]),
    ' '.join(cp_texts[19:20]),
    ' '.join(cp_texts[21:22]),
    ' '.join(cp_texts[23:24]),
    ' '.join(cp_texts[25:26]),
    ' '.join(cp_texts[27:28]),
    cp_texts[29],
]

#### Add book to main dataframe

In [161]:
main_df = pd.concat([main_df, cp_df])

In [162]:
len(main_df)

222

### My Antonia

#### Split full text into chapters

In [163]:
chapters('My Antonia')

265    My Antonia: Novel Summary
Name: chapters, dtype: object

In [164]:
start_df('My Antonia').iloc[0]['chapter_summary'][:500]

'Introduction\nMy Ántonia opens with a short frame story: the author (presumably Willa Cather) relates a chance meeting with an old acquaintance, Jim Burden, on a train ride through Iowa.\xa0 The time appears to be roughly contemporary with the publication of the novel (1918).\xa0 The two adults reminisce about their shared Nebraska childhood, where they first met, and they mention a woman named Ántonia.\xa0 They challenge each other to write an account of their relationship with Ántonia, and only Jim appe'

Upon examination of full text of chapter summary for My Antonia included in data, it can be seen that only summaries of the introduction, and parts 1-4 of Book I are included. These sections of the book will be included in the final dataset.

In [165]:
start_df('My Antonia')

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
265,My Antonia: Novel Summary,Introduction\nMy Ántonia opens with a short fr...,My Antonia,My Antonia: Novel Summary


##### Split summary into chapters & make new dataframe for book

In [166]:
#Copied from a text editor document

chap_summaries = [
    'My Ántonia opens with a short frame story: the author (presumably Willa Cather) relates a chance meeting with an old acquaintance, Jim Burden, on a train ride through Iowa. The time appears to be roughly contemporary with the publication of the novel (1918).  The two adults reminisce about their shared Nebraska childhood, where they first met, and they mention a woman named Ántonia. They challenge each other to write an account of their relationship with Ántonia, and only Jim appears to follow through, composing a lengthy manuscript and bringing it to her a few months later.  The author says that this manuscript became the text of My Ántonia.',
    'The book opens with Jim Burden, the narrator, being taken west from Virginia to Nebraska to live with his grandparents after his parents’ deaths. Jake Marpole, a farm hand who worked for Jim’s parents, accompanies him. Jake is a young man at the time, and Jim is ten years old. They take a train across the country, and they hear about an immigrant family traveling on the same train going to the same place, Black Hawk, Nebraska. Jake buys and gives Jim a copy of The Life of Jesse James, a book he treasures for many years. When they arrive at Black Hawk, Jake and Jim see the immigrant family (the Shimerdas), and hear the family talking in a foreign language. They are greeted by Otto Fuchs, a German immigrant and employee of Jim’s grandparents. Jim compares Fuchs to a character from the Jesse James book—he is visibly tanned and scarred, and dressed like a cowboy—and Fuchs leads them to a wagon to take them to the Burdens’ farm.  Jim notices the immigrant family again, getting into the only other wagon present. Fuchs drives them over the Nebraska land, and Jim thinks to himself about how empty and featureless the land is compared to Virginia.  He realizes he has left his parents behind, and he feels intimidated and insignificant under the vast sky and wide, flat land.',
    'Jim falls asleep in the wagon and wakes up in his grandparents’ house. He sees his grandmother when he opens his eyes, and she immediately compares him to her dead son—Jim’s father—and invites him into the kitchen.  Jim makes his way out of bed and down to the basement kitchen, and he seems impressed by the order and cleanliness of the arrangement. He takes a bath in the tin washtub in the kitchen while his grandmother is in the attached dining room, refusing her help with cleaning himself.  Jim notices the details of the kitchen, acquaints himself with the pets of the house, and muses for a little while on his impressions of his grandparents. Otto Fuchs introduces him to one of the horses, and Jim, Otto, and Jake are called upstairs for prayers. The next day, Jim goes outside and starts to get to know the town. His grandparents apparently live in the only wooden house in the entire town; the neighbors live in sod houses Most of the area around the house, at least the part not covered with corn, is covered with wild red grass. As Jim is musing on the red grass, his grandmother invites him to accompany her to the garden to dig up some potatoes.\xa0 She tells him that he should never go to the garden without some kind of weapon to protect him from rattlesnakes.\xa0 After digging some potatoes, Jim asks if he can stay in the garden without his grandmother. She agrees, but she gives him some information about all of the various animals that he might see.  He spends a good portion of the day in the garden, and he enjoys the feeling that he has become part of the land.',
    'A few days later, Jim accompanies Otto Fuchs on a trip to visit the new Bohemian family in the town with a gift of some provisions. Jim finds out that the Bohemian family, the Shimerdas, bought a piece of land not far from his grandparents, and that they had paid too much for it.\xa0 They had been swindled by Peter Krajiek, a Bohemian man who was already living there. No one in the family spoke enough English to survive without Krajiek, so he also functioned as their interpreter. Fuchs suggests that he might have helped them with Krajiek, except that he doesn’t think the Shimerdas will trust an Austrian.  When asked to explain, Fuchs says that it’s “politics,” and that it would take too long, but that Bohemians don’t trust Austrians. As the Grandmother, Fuchs, and Jim make their way to the Shimerdas’ farm, they notice Squaw Creek, a narrow stream that runs through a deep ravine through the Shimerdas’ land. The ravine makes farming that piece of land difficult and reduces its value. They come upon the Shimerdas’ house, a dugout built against the edge of the ravine. Mrs. Shimerda comes out of the house to meet them, and shakes Grandmother’s hand. She speaks a little English about the house, saying that it is no good.  Ántonia and Yulka, the two daughters, come out of the house and meet the visitors, as does Ambrosch, the oldest son.  Ántonia, Jim notices, is brown-skinned and brown-eyed, and quite beautiful.  She is about fourteen years old, a few years older than Jim. While the two families are talking, Krajiek and Marek, the second son who shows off a webbed finger and acts a bit strangely, approach from the barn. At some point, Mr. Shimerda emerges, and Jim is greatly impressed by his appearance and his demeanor. Mr. Shimerda doesn’t seem to belong on a farm; he seems too aristocratic for farm labor. Ántonia and Yulka drag Jim away from his family and bring him over to the edge of the ravine  Ántonia starts quizzing him on the English names for things, including his own.  Jim teaches her several English words, and she tries to repay him with a gift of a ring. After a little while, Mr. Shimerda comes looking for the girls, and they run to meet him.  Mr. Shimerda confronts Jim with a silent, questioning look. Then he returns to the house, takes out a book with Bohemian and English alphabets, and asks Grandmother to teach Ántonia English.',
    'Jim learns to ride one of his grandparents’ horses, and he becomes the messenger for the family. As the messenger, with the freedom of travel that he has with the horse, he becomes a sightseer, traveling about the area to admire the landscapes. He also instructs Ántonia in English every day. She runs across the prairie between them, and Jim shares some of the watermelon from the farm with her. Jim also notices the ways that the rattlesnakes take advantage of the prairie dogs in the area, and compares Krajiek to a rattlesnake in a prairie dog town. The Shimerdas don’t know how to get rid of their only English-speaking Bohemian companion. '
]

In [167]:
len(chap_summaries)

5

In [168]:
ma = {
    'chapter_title':['My Antonia: Introduction', 'My Antonia: Book I Chapter I', 'My Antonia: Book I Chapter II', 'My Antonia: Book I Chapter III', 'My Antonia: Book I Chapter IV'],
    'chapter_summary':chap_summaries,
    'book_title':['My Antonia', 'My Antonia', 'My Antonia', 'My Antonia', 'My Antonia'],
    'chapters':['Introduction', 'Book I Chapter I', 'Book I Chapter II', 'Book I Chapter III', 'Book I Chapter IV']
}

In [169]:
ma_df = pd.DataFrame(ma)
ma_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
0,My Antonia: Introduction,My Ántonia opens with a short frame story: the...,My Antonia,Introduction
1,My Antonia: Book I Chapter I,"The book opens with Jim Burden, the narrator, ...",My Antonia,Book I Chapter I
2,My Antonia: Book I Chapter II,Jim falls asleep in the wagon and wakes up in ...,My Antonia,Book I Chapter II
3,My Antonia: Book I Chapter III,"A few days later, Jim accompanies Otto Fuchs o...",My Antonia,Book I Chapter III
4,My Antonia: Book I Chapter IV,Jim learns to ride one of his grandparents’ ho...,My Antonia,Book I Chapter IV


##### Split text into chapters

In [170]:
ma_text = open('./data/book_full_texts/My_Antonia.txt', 'r').read()

ma_intro = ma_text.partition('BOOK I')[0]

ma_book1 = (ma_text.partition('BOOK I')[2]).partition('BOOK II')[0]

In [171]:
ma_ch1 = ma_book1.partition('II')[0]
ma_ch2 = (ma_book1.partition('II')[2]).partition('III')[0]
ma_ch3 = (ma_book1.partition('III')[2]).partition('IV')[0]
ma_ch4 = ma_book1.partition('IV')[2]

In [172]:
ma_df['chapter_text'] = [ma_intro, ma_ch1, ma_ch2, ma_ch3, ma_ch4]

#### Add book to main dataframe

In [173]:
main_df = pd.concat([main_df, ma_df])

In [174]:
len(main_df)

227

### Adam Bede

#### Split full text into chapters

In [175]:
chapters('Adam Bede')

266    Adam Bede: Summary
Name: chapters, dtype: object

In [176]:
start_df('Adam Bede').iloc[0]['chapter_summary']

'George Eliot, Adam Bede. Edited with an Introduction by Stephen Gill. Penguin Books, London, 1980, rpt. 1985.\nBook First\n\xa0\nSummary of Chapter 1: The Workshop\nThe narrator will magically use the ink of her pen as a mirror to show us the carpentry workshop of Jonathan Burge in the village of Hayslope in Loamshire, England, 1799. Five workmen there are busy, the most striking of them a tall strong man singing a hymn in a baritone voice as he finishes a mantelpiece. This is Adam Bede, the obvious leader of the group. His brother, Seth, is younger and claims he has finished the door he is working on. Sandy Jim teases him that he forgot the panels on the door. Wiry Ben Cranage pipes in saying that after all, Seth is a wool-gathering Methodist. Adam pushes Ben against the wall until he promises to leave his brother alone. In friendly laughter, the men accuse Seth of thinking of the woman Methodist preacher who will preach on the Green tonight. Seth is in love with Dinah Morris and inv

Upon examination of full text of chapter summary for Adam Bede included in data, it can be seen that only a summary of the first chapter is included. This section of the book will be included in the final dataset.

#### Make dataframe for book

In [177]:
ab_df = start_df('Adam Bede')
ab_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
266,Adam Bede: Summary,"George Eliot, Adam Bede. Edited with an Introd...",Adam Bede,Adam Bede: Summary


In [178]:
#Summary of first chapter copied from text editor document

ab_df.iloc[0]['chapter_summary'] = 'The narrator will magically use the ink of her pen as a mirror to show us the carpentry workshop of Jonathan Burge in the village of Hayslope in Loamshire, England, 1799. Five workmen there are busy, the most striking of them a tall strong man singing a hymn in a baritone voice as he finishes a mantelpiece. This is Adam Bede, the obvious leader of the group. His brother, Seth, is younger and claims he has finished the door he is working on. Sandy Jim teases him that he forgot the panels on the door. Wiry Ben Cranage pipes in saying that after all, Seth is a wool-gathering Methodist. Adam pushes Ben against the wall until he promises to leave his brother alone. In friendly laughter, the men accuse Seth of thinking of the woman Methodist preacher who will preach on the Green tonight. Seth is in love with Dinah Morris and invites the others to hear her. Adam declares his loyalty to the Church but believes everyone should worship as he chooses. When the church clock strikes six, the other men immediately leave off working, but Adam chides them for having no pride in their work, and he continues until he finishes, going home with his dog, Gyp, while Seth goes to the Green to hear Dinah preach. A stranger, passing by on horseback, cannot help but admire Adam Bede as he passes.\n'

In [179]:
ab_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
266,Adam Bede: Summary,The narrator will magically use the ink of her...,Adam Bede,Adam Bede: Summary


In [180]:
ab_df.iloc[0]['chapter_title'] = 'Adam Bede: Summary Chapter 1'
ab_df.iloc[0]['chapters'] = 'Chapter 1'

In [181]:
ab_text = open('./data/book_full_texts/Adam_Bede.txt', 'r').read()

In [182]:
ab_partition = ab_text.partition('Chapter II')

In [183]:
ab_ch1 = ab_partition[0]

In [184]:
ab_ch1[:500]

'Adam BedebyGeorge Eliot  [pseudonym of Mary Anne Evans]\n\n\n\n\n\nAdam Bede\nby George Eliot\n\n\n\n\n\nBook One\n\n\nChapter I\n\n\nThe Workshop\n\n\nWith a single drop of ink for a mirror, the Egyptian sorcerer\nundertakes to reveal to any chance comer far-reaching visions of\nthe past.  This is what I undertake to do for you, reader.  With\nthis drop of ink at the end of my pen, I will show you the roomy\nworkshop of Mr. Jonathan Burge, carpenter and builder, in the\nvillage of Hayslope, as it appeared on the eighteen'

In [185]:
ab_df['chapter_text'] = ab_ch1

In [186]:
ab_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
266,Adam Bede: Summary Chapter 1,The narrator will magically use the ink of her...,Adam Bede,Chapter 1,Adam BedebyGeorge Eliot [pseudonym of Mary An...


#### Add book to main dataframe

In [187]:
main_df = pd.concat([main_df, ab_df])

In [188]:
len(main_df)

228

### Oliver Twist

#### Split full text into chapters

In [189]:
chapters('Oliver Twist')

267      Chapters1-3
268      Chapters4-6
269      Chapters7-9
270    Chapters10-12
271    Chapters13-15
272    Chapters16-18
273    Chapters19-21
274    Chapters22-24
275    Chapters25-27
276    Chapters28-30
277    Chapters31-33
278    Chapters34-36
279    Chapters37-39
280    Chapters40-42
281    Chapters43-45
282    Chapters46-48
283    Chapters49-51
284    Chapters52-53
Name: chapters, dtype: object

In [190]:
#Install package to help with roman numeral chapter numbers (should have done this earlier!)

# roman also added to imports at top of notebook

%pip install roman

Note: you may need to restart the kernel to use updated packages.


In [191]:
import roman

r_numerals = []
for i in range(1,54):
    r_numerals.append(roman.toRoman(i))

In [192]:
ot_ch_names = []

for r in r_numerals:
    ot_ch_names.append(f'Chapter {r}')

In [193]:
ot_ch_names[:10]

['Chapter I',
 'Chapter II',
 'Chapter III',
 'Chapter IV',
 'Chapter V',
 'Chapter VI',
 'Chapter VII',
 'Chapter VIII',
 'Chapter IX',
 'Chapter X']

In [194]:
ot_ch_texts = split_chapters('Oliver Twist', ot_ch_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [195]:
ot_df = start_df('Oliver Twist')
ot_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
267,Oliver Twist: Novel Summary: Chapters1-3,Chapter1: An unknown woman was found lying in ...,Oliver Twist,Chapters1-3
268,Oliver Twist: Novel Summary: Chapters4-6,Chapter 4: The board decides that the best thi...,Oliver Twist,Chapters4-6
269,Oliver Twist: Novel Summary: Chapters7-9,Chapter 7: Noah found Mr. Bumble and told him ...,Oliver Twist,Chapters7-9
270,Oliver Twist: Novel Summary: Chapters10-12,Chapter 10: Oliver spent more time with the Je...,Oliver Twist,Chapters10-12
271,Oliver Twist: Novel Summary: Chapters13-15,Chapter 13: Fagin yells at the boys until they...,Oliver Twist,Chapters13-15
272,Oliver Twist: Novel Summary: Chapters16-18,Chapter 16: Nancy and Mr. Sikes drag Oliver to...,Oliver Twist,Chapters16-18
273,Oliver Twist: Novel Summary: Chapters19-21,Chapter 19: Fagin left the house where the boy...,Oliver Twist,Chapters19-21
274,Oliver Twist: Novel Summary: Chapters22-24,Chapter 22: Two of Mr. Sikes cohorts are waiti...,Oliver Twist,Chapters22-24
275,Oliver Twist: Novel Summary: Chapters25-27,"Chapter 25: Fagin, Charlie Bates, the Dodger, ...",Oliver Twist,Chapters25-27
276,Oliver Twist: Novel Summary: Chapters28-30,"Chapter 28: As they ran through the fields, Si...",Oliver Twist,Chapters28-30


In [196]:
ot_df['chapter_text'] = [
    ' '.join(ot_ch_texts[0:2]),
    ' '.join(ot_ch_texts[3:5]),
    ' '.join(ot_ch_texts[6:8]),
    ' '.join(ot_ch_texts[9:11]),
    ' '.join(ot_ch_texts[12:14]),
    ' '.join(ot_ch_texts[15:17]),
    ' '.join(ot_ch_texts[18:20]),
    ' '.join(ot_ch_texts[21:23]),
    ' '.join(ot_ch_texts[24:26]),
    ' '.join(ot_ch_texts[27:29]),
    ' '.join(ot_ch_texts[30:32]),
    ' '.join(ot_ch_texts[33:35]),
    ' '.join(ot_ch_texts[36:38]),
    ' '.join(ot_ch_texts[39:41]),
    ' '.join(ot_ch_texts[42:44]),
    ' '.join(ot_ch_texts[45:47]),
    ' '.join(ot_ch_texts[48:50]),
    ' '.join(ot_ch_texts[51:52]),
]

#### Add book to main dataframe

In [197]:
main_df = pd.concat([main_df, ot_df])

In [198]:
len(main_df)

246

### David Copperfield

#### Split full text into chapters

In [199]:
chapters('David Copperfield')

285    David Copperfield: Summary: Chapters I-III
Name: chapters, dtype: object

In [200]:
start_df('David Copperfield').iloc[0]['chapter_summary'][:500]

"\xa0\nPreface\n\tThe author expresses both pleasure and sorrow at finishing his novel ?pleasure at having successfully completed this lengthy project, and sorrow at parting from the work, which has become a part of him.\n\tChapter I: I am born\n\tThe adult David Copperfield narrates the story of his life, beginning with his birth.\n\tThe women of David's neighborhood believe, based on the time of his birth, that he is destined to be unlucky and that he would possess the gift of seeing ghosts and spirits. On"

Upon examination of full text of chapter summary for David Copperfield included in data, it can be seen that only summaries of the preface and the first three chapters are included. These sections of the book will be included in the final dataset.

In [201]:
dc_chap_names = ['PREFACE TO THE CHARLES DICKENS EDITION', 'CHAPTER 1', 'CHAPTER 2', 'CHAPTER 3']

In [202]:
dc_chap_texts = split_chapters('David Copperfield', dc_chap_names)

In [203]:
#Examined full text of last entry in last - all of book chapter 3 & on included because 'split_chapters' function
# is not designed to only take in portion of chapters in a book

dc_chap_texts[-1][:500]

"\nI HAVE A CHANGE\n\n\nThe carrier's horse was the laziest horse in the world, I should\nhope, and shuffled along, with his head down, as if he liked to\nkeep people waiting to whom the packages were directed.  I fancied,\nindeed, that he sometimes chuckled audibly over this reflection,\nbut the carrier said he was only troubled with a cough.\nThe carrier had a way of keeping his head down, like his horse, and\nof drooping sleepily forward as he drove, with one of his arms on\neach of his knees.  I say 'dr"

In [204]:
# Edit chapter 3 text entry

last_ch_partition = dc_chap_texts[-1].partition('CHAPTER 4')

dc_chap_texts[-1] = last_ch_partition[0]

In [205]:
#Check edited chapter 3 entry - now contains correct text

dc_chap_texts[-1][:500]

"\nI HAVE A CHANGE\n\n\nThe carrier's horse was the laziest horse in the world, I should\nhope, and shuffled along, with his head down, as if he liked to\nkeep people waiting to whom the packages were directed.  I fancied,\nindeed, that he sometimes chuckled audibly over this reflection,\nbut the carrier said he was only troubled with a cough.\nThe carrier had a way of keeping his head down, like his horse, and\nof drooping sleepily forward as he drove, with one of his arms on\neach of his knees.  I say 'dr"

#### Make dataframe for book

In [206]:
dc_df = start_df('David Copperfield')
dc_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
285,David Copperfield: Summary: Chapters I-III,\nPreface\n\tThe author expresses both pleasu...,David Copperfield,David Copperfield: Summary: Chapters I-III


In [207]:
dc = {
    'chapter_title':[
        'David Copperfield: Summary: Preface', 'David Copperfield: Summary: Chapter I', 
        'David Copperfield: Summary: Chapter II', 'David Copperfield: Summary: Chapter III',
    ],
    'chapter_summary':[
        'The author expresses both pleasure and sorrow at finishing his novel ?pleasure at having successfully completed this lengthy project, and sorrow at parting from the work, which has become a part of him.',
        'The adult David Copperfield narrates the story of his life, beginning with his birth.\n\tThe women of David\'s neighborhood believe, based on the time of his birth, that he is destined to be unlucky and that he would possess the gift of seeing ghosts and spirits. On the first prediction, he comments that the story he will tell will reveal its truth or otherwise, and on the second prediction, he comments that he is not aware of having any such gift.\n\tDavid\'s father is already dead when he is born. An aunt of his father\'s, Miss Betsey Trotwood, turns up on the day of his birth. Betsey was once married to a man who beat her and tried to kill her, and she ended up paying him to stay away from her. Though Betsey was fond of David\'s father, she was contemptuous of his mother, Clara Copperfield, calling her "a wax doll.?/p>\n\tThis is the first time that Betsey has met Clara, and she gives her the nickname of Baby because of her youthful appearance. Betsey informs Clara that she is certain that she will give birth to a girl. She intends to be a friend to the girl, and to ensure that she does not place her trust in the type of man who will take advantage of her, as she herself did.\n\tLater, Mr. Chillip, the doctor, emerges from the birthing room with the news that Clara has had a boy. Without a word, Betsey walks out of the house, never to return.',
        'David describes some of his earliest memories. He remembers his pretty mother, and the kindly nurse, Clara Peggotty, who runs the household. One evening, David is reading to Peggotty from a book about crocodiles. He asks Peggotty if a person whose spouse dies can marry again. Peggotty says that they can if they choose, though she sounds unenthusiastic.\n\tClara comes in with Mr. Murdstone, a handsome man with black hair, eyes and whiskers, who is courting her. David takes an instant dislike to him. David falls asleep. When he wakes, he hears that both women are in tears. Peggotty is telling Clara that her former husband, David\'s father, would not have liked Mr. Murdstone.\n\tMr. Murdstone returns one day and takes David on a trip to Lowestoft, a nearby town, to meet some of his business acquaintances. One of the men, Mr. Quinion, jokes with Mr. Murdstone about his courtship of Clara and David\'s dislike of him. David observes that the men seem to share his wariness of Mr. Murdstone, and that Mr. Murdstone never laughs with them.\n\tOne evening, Peggotty asks David to accompany her on a two-week visit to her brother\'s at Yarmouth. He excitedly agrees. As he says goodbye to his mother, Mr. Murdstone appears at her side and tells her to control her emotions.',
        'Peggotty and David travel by carrier\'s cart to Yarmouth, where they are met at an inn by Peggotty\'s nephew,Ham. Ham takes them to his family\'s home, which is a boat converted into a house. Inside there is a smell of fish. Peggotty\'s brother, Daniel Peggotty, who owns the house, deals in seafood, which he stores in an outhouse. Mr. Peggotty lives in the house with his nephew, Ham, and his niece, Little Em\'ly, whom he adopted when their fathers drowned at sea. Also living with Mr. Peggotty is Mrs. Gummidge, the widow of Mr. Peggotty\'s partner in a boat. Mr. Peggotty is an easy-going man who only becomes angry if his generosity in adopting destitute people is mentioned.\n\tNext morning, David goes out onto the beach with Little Em\'ly, where they collect pebbles and fall in love with each other. Little Em\'ly says that she is afraid of the sea, which has destroyed so many local men and boats. She also reveals that she wants to be a lady. The adult David reflects that perhaps it would have been better if the sea had swallowed Little Em\'ly that morning, so that she would not have to suffer everything that she suffered since.\n\tMrs. Gummidge turns out to be a depressed sort of woman, given to complaining that "I am a lone lorn creetur...and everythink goes contrairy with me.?She claims that while others may share the adverse conditions that cause her grief, "I feel it more." Mr. Peggotty remarks that "She\'s been thinking of the old \'un," meaning her dead husband.\n\tAt the end of their vacation, Peggotty and David set out for home. David is excited to see his mother again, but Peggotty tries to restrain his enthusiasm. When they reach home, David is baffled that his mother has not come out to the gate to meet him. He fears that she is dead. Peggotty reveals that Clara has married Mr. Murdstone.\n\tDavid finds his mother sitting with Mr. Murdstone by the fire. Clara rises to meet David, but Mr. Murdstone tells her again to control her emotions. David finds that his bedroom has been moved to a more distant room. There is a fierce black dog in the kennel in the yard, which springs out to attack him'
    ],
    'book_title':['David Copperfield', 'David Copperfield', 'David Copperfield', 'David Copperfield'],
    'chapters':['Preface', 'Chapter I', 'Chapter II', 'Chapter III'],
}

dc_df = pd.DataFrame(dc)
dc_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
0,David Copperfield: Summary: Preface,The author expresses both pleasure and sorrow ...,David Copperfield,Preface
1,David Copperfield: Summary: Chapter I,The adult David Copperfield narrates the story...,David Copperfield,Chapter I
2,David Copperfield: Summary: Chapter II,David describes some of his earliest memories....,David Copperfield,Chapter II
3,David Copperfield: Summary: Chapter III,Peggotty and David travel by carrier's cart to...,David Copperfield,Chapter III


In [208]:
dc_df['chapter_text'] = dc_chap_texts

#### Add book to main dataframe

In [209]:
main_df = pd.concat([main_df, dc_df])

### Ivanhoe

#### Split full text into chapters

In [210]:
chapters('Ivanhoe')

286    Ivanhoe: Summary
Name: chapters, dtype: object

##### Divide summary into chapter summaries & make new dataframe for book

In [211]:
iv_df = start_df('Ivanhoe')
iv_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
286,Ivanhoe: Summary,Chapter I\n\tThe story is set in England in th...,Ivanhoe,Ivanhoe: Summary


In [212]:
ivanhoe_summary = iv_df.iloc[0]['chapter_summary']

In [213]:
iv_ch1 = ivanhoe_summary.partition('Chapter II')[0]

In [214]:
iv_ch2_4 = ivanhoe_summary.partition('Chapter II')[1] + ivanhoe_summary.partition('Chapter II')[2]
iv_ch2 = iv_ch2_4.partition('Chapter III')[0]

In [215]:
iv_ch3_4 = iv_ch2_4.partition('Chapter III')[1] + iv_ch2_4.partition('Chapter III')[2]
iv_ch3 = iv_ch3_4.partition('Chapter IV')[0]

In [216]:
iv_ch4 = iv_ch3_4.partition('Chapter IV')[1] + iv_ch3_4.partition('Chapter IV')[2]

In [217]:
iv_df = pd.DataFrame(
    { 'chapter_title':['Ivanhoe Chapter I', 'Ivanhoe Chapter II', 'Ivanhoe Chapter III', 'Ivanhoe Chapter IV'],
        'chapter_summary':[iv_ch1, iv_ch2, iv_ch3, iv_ch4],
        'book_title':['Ivanhoe', 'Ivanhoe', 'Ivanhoe', 'Ivanhoe'],
        'chapters':['Chapter I', 'Chapter II', 'Chapter III', 'Chapter IV'],
    }
)
iv_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
0,Ivanhoe Chapter I,Chapter I\n\tThe story is set in England in th...,Ivanhoe,Chapter I
1,Ivanhoe Chapter II,Chapter II\n\tA group of ten horsemen overtake...,Ivanhoe,Chapter II
2,Ivanhoe Chapter III,"Chapter III\n\tIn Cedric’s mansion, the hall i...",Ivanhoe,Chapter III
3,Ivanhoe Chapter IV,Chapter IV\n\tCedric greets his hosts with dig...,Ivanhoe,Chapter IV


##### Divide full text chapters

In [218]:
iv_ch_names = ['CHAPTER I', 'CHAPTER II', 'CHAPTER III', 'CHAPTER IV']

In [219]:
iv_ch_texts = split_chapters('Ivanhoe', iv_ch_names)

In [220]:
#Edit last item in chapter texts list (should be Ch 4 - currently Ch $ & on due to error with only using a portion 
# of the book - same as what occured with My Antonia)

ch_4_partition = iv_ch_texts[-1].partition('CHAPTER V')
iv_ch_texts[-1] = ch_4_partition[0]

In [221]:
iv_df['chapter_text'] = iv_ch_texts

In [222]:
iv_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
0,Ivanhoe Chapter I,Chapter I\n\tThe story is set in England in th...,Ivanhoe,Chapter I,"IvanhoebyWalter Scott\n\n\n\n\nPrepared by ""Jo..."
1,Ivanhoe Chapter II,Chapter II\n\tA group of ten horsemen overtake...,Ivanhoe,Chapter II,"\n\n\nA Monk there was, a fayre for the maistr..."
2,Ivanhoe Chapter III,"Chapter III\n\tIn Cedric’s mansion, the hall i...",Ivanhoe,Chapter III,\n\n\nThen (sad relief!) from the bleak coast ...
3,Ivanhoe Chapter IV,Chapter IV\n\tCedric greets his hosts with dig...,Ivanhoe,Chapter IV,\n\n\nWith sheep and shaggy goats the porkers ...


#### Add book to main dataframe

In [223]:
main_df = pd.concat([main_df, iv_df])

In [224]:
len(main_df)

254

### As You Like It

#### Split full text into chapters

In [225]:
chapters('As You Like It')

287    Act 1, Scene 1-Act 1, Scene 2
288    Act 1, Scene 3-Act 2, Scene 1
289    Act 2, Scene 2-Act 2, Scene 3
290    Act 2, Scene 4-Act 2, Scene 5
291    Act 2, Scene 6-Act 2, Scene 7
292    Act 3, Scene 1-Act 3, Scene 2
293    Act 3, Scene 3-Act 3, Scene 4
294    Act 3, Scene 5-Act 4, Scene 1
295    Act 4, Scene 2-Act 4, Scene 3
296    Act 5, Scene 1-Act 5, Scene 2
297    Act 5, Scene 3-Act 5, Scene 4
298                         Epilogue
Name: chapters, dtype: object

In [226]:
ayl_act_names = [
    'ACT I.',
    'ACT II.',
    'ACT III.',
    'ACT IV',
    'ACT V',
    'EPILOGUE'
]

In [227]:
ayl_ch_names_list = []

In [228]:
ayl_acts = split_chapters('As You Like It', ayl_act_names)

In [229]:
ayl_act1_names =[
    'ACT I. SCENE I.',
    'SCENE II.',
    'SCENE III.'
]
ayl_ch_names_list.append(ayl_act1_names)


ayl_act2_names = [ 
    'ACT II. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.',
    'SCENE V.',
    'SCENE VI.',
    'SCENE VII.',
    'SCENE VIII'
]
ayl_ch_names_list.append(ayl_act2_names)


ayl_act3_names = [
    'ACT III. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.',
    'SCENE V.',
]
ayl_ch_names_list.append(ayl_act3_names)


ayl_act4_names = [
    'ACT IV. SCENE I.',
    'SCENE II.',
    'SCENE III.',
]
ayl_ch_names_list.append(ayl_act4_names)


ayl_act5_names = [
    'ACT V. SCENE I.',
    'SCENE II.',
    'SCENE III.',
    'SCENE IV.',
]
ayl_ch_names_list.append(ayl_act5_names)

In [230]:
ayl_texts = []
for i in range(5): 
    text = ayl_acts[i]
    chapter_names = ayl_ch_names_list[i]
    num_chapters = len(chapter_names)

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    ayl_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        ayl_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    ayl_texts.append(chapter_last)

In [231]:
ayl_acts[5]

"\n                           EPILOGUE.\n  ROSALIND. It is not the fashion to see the lady the epilogue;\nbut\n    it is no more unhandsome than to see the lord the prologue.\nIf it\n    be true that good wine needs no bush, 'tis true that a good\nplay\n    needs no epilogue. Yet to good wine they do use good bushes;\nand\n    good plays prove the better by the help of good epilogues.\nWhat a\n    case am I in then, that am neither a good epilogue, nor\ncannot\n    insinuate with you in the behalf of a good play! I am not\n    furnish'd like a beggar; therefore to beg will not become me.\nMy\n    way is to conjure you; and I'll begin with the women. I\ncharge\n    you, O women, for the love you bear to men, to like as much\nof\n    this play as please you; and I charge you, O men, for the\nlove\n    you bear to women- as I perceive by your simp'ring none of\nyou\n    hates them- that between you and the women the play may\nplease.\n    If I were a woman, I would kiss as many of you as 

In [232]:
ayl_texts.append(ayl_acts[5])

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [233]:
ayl_df = start_df('As You Like It')
ayl_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
287,"As You Like It: Novel Summary: Act 1, Scene 1-...","Act 1, Scene 1: Orlando, the youngest son of S...",As You Like It,"Act 1, Scene 1-Act 1, Scene 2"
288,"As You Like It: Novel Summary: Act 1, Scene 3-...","Act 1, Scene 3: Celia asks Rosalind what is on...",As You Like It,"Act 1, Scene 3-Act 2, Scene 1"
289,"As You Like It: Novel Summary: Act 2, Scene 2-...","Act 2, Scene 2: Duke Frederick finds that Celi...",As You Like It,"Act 2, Scene 2-Act 2, Scene 3"
290,"As You Like It: Novel Summary: Act 2, Scene 4-...","Act 2, Scene 4: Rosalind, dressed as the boy G...",As You Like It,"Act 2, Scene 4-Act 2, Scene 5"
291,"As You Like It: Novel Summary: Act 2, Scene 6-...","Act 2, Scene 6: Adam and Orlando are in Arden ...",As You Like It,"Act 2, Scene 6-Act 2, Scene 7"
292,"As You Like It: Novel Summary: Act 3, Scene 1-...","Act 3, Scene 1: Duke Frederick tells Oliver he...",As You Like It,"Act 3, Scene 1-Act 3, Scene 2"
293,"As You Like It: Novel Summary: Act 3, Scene 3-...","Act 3, Scene 3: Touchstone decides that he wan...",As You Like It,"Act 3, Scene 3-Act 3, Scene 4"
294,"As You Like It: Novel Summary: Act 3, Scene 5-...","Act 3, Scene 5: Silvius and Phoebe sit talking...",As You Like It,"Act 3, Scene 5-Act 4, Scene 1"
295,"As You Like It: Novel Summary: Act 4, Scene 2-...","Act 4, Scene 2: The Duke's men celebrate that ...",As You Like It,"Act 4, Scene 2-Act 4, Scene 3"
296,"As You Like It: Novel Summary: Act 5, Scene 1-...","Act 5, Scene 1: Touchstone and Audrey are toge...",As You Like It,"Act 5, Scene 1-Act 5, Scene 2"


In [234]:
len(ayl_texts)

24

In [235]:
ayl_df['chapter_text'] = [
    ' '.join(ayl_texts[0:1]),
    ' '.join(ayl_texts[2:3]),
    ' '.join(ayl_texts[4:5]),
    ' '.join(ayl_texts[6:7]),
    ' '.join(ayl_texts[8:9]),
    ' '.join(ayl_texts[10:11]),
    ' '.join(ayl_texts[12:13]),
    ' '.join(ayl_texts[14:15]),
    ' '.join(ayl_texts[16:17]),
    ' '.join(ayl_texts[18:19]),
    ' '.join(ayl_texts[20:21]),
    ayl_texts[22]
]

#### Add book to main dataframe

In [236]:
main_df = pd.concat([main_df, ayl_df])

In [237]:
len(main_df)

266

### The Jungle

#### Split full text into chapters

In [238]:
chapters('The Jungle')

299    The Jungle: Novel Summary
Name: chapters, dtype: object

In [239]:
start_df('The Jungle').iloc[0]['chapter_summary']

'\n\nSummary\nThe Jungle begins with Marija Berczynskas overseeing and organizing a wedding ceremony in the ‘back of the yards’ in Chicago. She is Lithuanian and today her cousin, Ona Lukoszaite, has just married fellow Lithuanian Jurgis Rudkus. Ona is not quite sixteen and the readers are told her new husband is able to carry a two hundred and fifty pound quarter of beef without staggering, but he is as ‘frightened as a hunted animal’ at the wedding celebrations.\n\tA ‘charming informality’ is one of the characteristics of this celebration, but it is a law (transplanted from Lithuania) that no one leaves hungry. Ona’s stepmother is Aunt Elizabeth, and is referred to as Teta Elzbieta, and she and other women bring masses of food through to the guests.\n\tThere is also music being played and this transforms the place from a saloon ‘to a fairy place, a wonderland, a little corner of the high mansions of the sky’. The violinist is Tamoszius Kuszleika and he later gets engaged to Marija. O

This text contains a summary of chapters 1-3 and an analysis. The summary portions will be used in the data set

In [240]:
j_text = open('./data/book_full_texts/The_Jungle.txt', 'r').read()

In [241]:
j_chap1 = j_text.partition('Chapter 2')[0]

In [242]:
j_chap2 = str(j_text.partition('Chapter 2')[2]).partition('Chapter 3')[0]

In [243]:
j_chap3 = str(j_text.partition('Chapter 3')[2]).partition('Chapter 4')[0]

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [244]:
start_df('The Jungle')

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
299,The Jungle: Novel Summary,\n\nSummary\nThe Jungle begins with Marija Ber...,The Jungle,The Jungle: Novel Summary


In [245]:
tj_summary = str(start_df('The Jungle').iloc[0]['chapter_summary'])

tj_chap_summaries = [
    tj_summary.partition('Chapter Two')[0],
    str(tj_summary.partition('Chapter Two')[2]).partition('In Chapter Three,')[0],
    str(tj_summary.partition('In Chapter Three,')[2]).partition('Analysis')[0]
]

In [246]:
tj = {
    'chapter_title':['The Jungle: Novel Summary Chapter I', 'The Jungle: Novel Summary Chapter II', 'The Jungle: Novel Summary Chapter III'],
    'chapter_summary':tj_chap_summaries,
    'book_title':['The Jungle', 'The Jungle', 'The Jungle'],
    'chapters':['Chapter I', 'Chapter II', 'Chapter III'],
    'chapter_text':[j_chap1, j_chap2, j_chap3]
}

In [247]:
tj_df = pd.DataFrame(tj)
tj_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
0,The Jungle: Novel Summary Chapter I,\n\nSummary\nThe Jungle begins with Marija Ber...,The Jungle,Chapter I,"The Jungle, by Upton Sinclair\n\n\n\nThe Jungl..."
1,The Jungle: Novel Summary Chapter II,begins by explaining how Jurgis is still youn...,The Jungle,Chapter II,"\n\n\nJurgis talked lightly about work, becau..."
2,The Jungle: Novel Summary Chapter III,"Jokubas, a fellow Lithuanian who runs a delic...",The Jungle,Chapter III,"\n\n\nIn his capacity as delicatessen vender,..."


#### Add book to main dataframe

In [248]:
main_df = pd.concat([main_df, tj_df])

In [249]:
len(main_df)

269

### The Red Badge of Courage

#### Split full text into chapters

In [250]:
list(chapters('The Red Badge of Courage'))

['The Red Badge of Courage : Novel Summary',
 'The Red Badge of Courage : Novel Summary: Chapter 1',
 'The Red Badge of Courage : Novel Summary: Chapter 2-4',
 'The Red Badge of Courage : Novel Summary: Chapter 5-7',
 'The Red Badge of Courage : Novel Summary: Chapter 8-10',
 'The Red Badge of Courage : Novel Summary: Chapter 11-12',
 'The Red Badge of Courage : Novel Summary: Chapter 13-15',
 'The Red Badge of Courage : Novel Summary: Chapter 16-17',
 'The Red Badge of Courage : Novel Summary: Chapter 18-19',
 'The Red Badge of Courage : Novel Summary: Chapter 20-22',
 'The Red Badge of Courage : Novel Summary: Chapter 23-24']

In [251]:
rbc_ch_names = []

r_numerals = []
for i in range(1,25):
    r_numerals.append(roman.toRoman(i))

for r in r_numerals:
    rbc_ch_names.append(f'Chapter {r}')

In [252]:
rbc_chap_texts = split_chapters('The Red Badge of Courage', rbc_ch_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [253]:
rbc_df = start_df('The Red Badge of Courage')
rbc_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
300,The Red Badge of Courage : Novel Summary,The story begins at the camp of the Union army...,The Red Badge of Courage,The Red Badge of Courage : Novel Summary
301,The Red Badge of Courage : Novel Summary: Chap...,The story begins at the camp of the Union army...,The Red Badge of Courage,The Red Badge of Courage : Novel Summary: Chap...
302,The Red Badge of Courage : Novel Summary: Chap...,The tall soldier’s information proves to be in...,The Red Badge of Courage,The Red Badge of Courage : Novel Summary: Chap...
303,The Red Badge of Courage : Novel Summary: Chap...,"The enemy advances on Henry’s regiment, and th...",The Red Badge of Courage,The Red Badge of Courage : Novel Summary: Chap...
304,The Red Badge of Courage : Novel Summary: Chap...,"When twilight comes, Henry hears renewed sound...",The Red Badge of Courage,The Red Badge of Courage : Novel Summary: Chap...
305,The Red Badge of Courage : Novel Summary: Chap...,The noise of the battle grows louder and Henry...,The Red Badge of Courage,The Red Badge of Courage : Novel Summary: Chap...
306,The Red Badge of Courage : Novel Summary: Chap...,Henry makes his way apprehensively to towards ...,The Red Badge of Courage,The Red Badge of Courage : Novel Summary: Chap...
307,The Red Badge of Courage : Novel Summary: Chap...,Henry’s regiment marches to some trenches to r...,The Red Badge of Courage,The Red Badge of Courage : Novel Summary: Chap...
308,The Red Badge of Courage : Novel Summary: Chap...,During the lull in their portion of the battle...,The Red Badge of Courage,The Red Badge of Courage : Novel Summary: Chap...
309,The Red Badge of Courage : Novel Summary: Chap...,"Henry and Wilson, as they carry the flag, are ...",The Red Badge of Courage,The Red Badge of Courage : Novel Summary: Chap...


In [254]:
rbc_df.drop(index=300, inplace=True)

In [255]:
list(chapters('The Red Badge of Courage'))

['The Red Badge of Courage : Novel Summary',
 'The Red Badge of Courage : Novel Summary: Chapter 1',
 'The Red Badge of Courage : Novel Summary: Chapter 2-4',
 'The Red Badge of Courage : Novel Summary: Chapter 5-7',
 'The Red Badge of Courage : Novel Summary: Chapter 8-10',
 'The Red Badge of Courage : Novel Summary: Chapter 11-12',
 'The Red Badge of Courage : Novel Summary: Chapter 13-15',
 'The Red Badge of Courage : Novel Summary: Chapter 16-17',
 'The Red Badge of Courage : Novel Summary: Chapter 18-19',
 'The Red Badge of Courage : Novel Summary: Chapter 20-22',
 'The Red Badge of Courage : Novel Summary: Chapter 23-24']

In [256]:
rbc_df['chapter_text'] = [
    rbc_chap_texts[0],
    ' '.join(rbc_chap_texts[1:3]),
    ' '.join(rbc_chap_texts[4:6]),
    ' '.join(rbc_chap_texts[7:9]),
    ' '.join(rbc_chap_texts[10:11]),
    ' '.join(rbc_chap_texts[12:14]),
    ' '.join(rbc_chap_texts[15:16]),
    ' '.join(rbc_chap_texts[17:18]),
    ' '.join(rbc_chap_texts[19:21]),
    ' '.join(rbc_chap_texts[22:23]),
]

#### Add book to main dataframe

In [257]:
main_df = pd.concat([main_df, rbc_df])

In [258]:
len(main_df)

279

### Notes from the Underground

#### Split full text into chapters

In [259]:
chapters('Notes from the Underground')

311     Part 1 Chapter 1-Part 1 Chapter 2
312     Part 1 Chapter 3-Part 1 Chapter 4
313     Part 1 Chapter 5-Part 1 Chapter 6
314     Part 1 Chapter 7-Part 1 Chapter 8
315    Part 1 Chapter 9-Part 1 Chapter 10
316    Part 1 Chapter 11-Part 2 Chapter 1
317     Part 2 Chapter 2-Part 2 Chapter 3
318     Part 2 Chapter 4-Part 2 Chapter 5
319     Part 2 Chapter 6-Part 2 Chapter 7
320     Part 2 Chapter 8-Part 2 Chapter 9
321                     Part 2 Chapter 10
Name: chapters, dtype: object

In [260]:
nfu_part_names = [
    'PART I',
    'PART II'
]

In [261]:
nfu_ch_names_list = []

In [262]:
nfu_parts = split_chapters('Notes from the Underground', nfu_part_names)

In [263]:
nfu_part1_names =[
    'AUTHOR\'S NOTE.',  #this phrased used instead because name of first chapter ('I') is not a unique marker in the first portion of the text
    'II',
    'III',
    'IV',
    'V',
    'VI',
    'VII',
    'VIII',
    'IX',
    'X',
    'XI'
]
nfu_ch_names_list.append(nfu_part1_names)


nfu_part2_names = [ 
    '(translated by Juliet Soskice).',   #this phrased used instead because name of first chapter ('I') is not a unique marker in the first portion of the text
    'II',
    'III',
    'IV',
    'V',
    'VI',
    'VII',
    'VIII',
    'IX',
    'X'
]
nfu_ch_names_list.append(nfu_part2_names)

In [264]:
nfu_texts = []
for i in range(2): 
    text = nfu_parts[i]
    chapter_names = nfu_ch_names_list[i]
    num_chapters = len(chapter_names)

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    nfu_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        nfu_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    nfu_texts.append(chapter_last)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [265]:
nfu_df = start_df('Notes from the Underground')
nfu_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
311,Notes from the Underground: Novel Summary: Par...,Part 1 Chapter 1-Part 1 Chapter 2\nPart 1 Chap...,Notes from the Underground,Part 1 Chapter 1-Part 1 Chapter 2
312,Notes from the Underground: Novel Summary: Par...,Part 1 Chapter 3-Part 1 Chapter 4\nPart 1 Chap...,Notes from the Underground,Part 1 Chapter 3-Part 1 Chapter 4
313,Notes from the Underground: Novel Summary: Par...,Part 1 Chapter 5-Part 1 Chapter 6\nPart 1 Chap...,Notes from the Underground,Part 1 Chapter 5-Part 1 Chapter 6
314,Notes from the Underground: Novel Summary: Par...,Part 1 Chapter 7-Part 1 Chapter 8\nPart 1 Chap...,Notes from the Underground,Part 1 Chapter 7-Part 1 Chapter 8
315,Notes from the Underground: Novel Summary: Par...,"Part 1 Chapter 9: Next, the UM uses the analog...",Notes from the Underground,Part 1 Chapter 9-Part 1 Chapter 10
316,Notes from the Underground: Novel Summary: Par...,Part 1 Chapter 11-Part 2 Chapter 1\nPart 1 Cha...,Notes from the Underground,Part 1 Chapter 11-Part 2 Chapter 1
317,Notes from the Underground: Novel Summary: Par...,Part 2 Chapter 2-Part 2 Chapter 3\nPart 2 Chap...,Notes from the Underground,Part 2 Chapter 2-Part 2 Chapter 3
318,Notes from the Underground: Novel Summary: Par...,Part 2 Chapter 4-Part 2 Chapter 5\nPart 2 Chap...,Notes from the Underground,Part 2 Chapter 4-Part 2 Chapter 5
319,Notes from the Underground: Novel Summary: Par...,Part 2 Chapter 6-Part 2 Chapter 7\nPart 2 Chap...,Notes from the Underground,Part 2 Chapter 6-Part 2 Chapter 7
320,Notes from the Underground: Novel Summary: Par...,Part 2 Chapter 8-Part 2 Chapter 9\nPart 2 Chap...,Notes from the Underground,Part 2 Chapter 8-Part 2 Chapter 9


In [266]:
nfu_df['chapter_text'] = [
    ' '.join(nfu_texts[0:1]),
    ' '.join(nfu_texts[2:3]),
    ' '.join(nfu_texts[4:5]),
    ' '.join(nfu_texts[6:7]),
    ' '.join(nfu_texts[8:9]),
    ' '.join(nfu_texts[10:11]),
    ' '.join(nfu_texts[12:13]),
    ' '.join(nfu_texts[14:15]),
    ' '.join(nfu_texts[16:17]),
    ' '.join(nfu_texts[18:19]),
    nfu_texts[20]
]

#### Add book to main dataframe

In [267]:
main_df = pd.concat([main_df, nfu_df])

In [268]:
len(main_df)

290

### A Tale of Two Cities

#### Split full text into chapters

In [269]:
chapters('A Tale of Two Cities')

322                        A Tale of Two Cities: Summary
323    A Tale of Two Cities:Novel Summary: Book I Cha...
324                                   Book I Chapter 5-6
325                                  Book II Chapter 1-3
326                                  Book II Chapter 4-5
327                                    Book II Chapter 6
328                                  Book II Chapter 7-9
329                                Book II Chapter 10-14
330                                Book II Chapter 15-16
331                                Book II Chapter 16-20
332                                Book II Chapter 21-24
333                                 Book III Chapter 1-5
334                                Book III Chapter 6-11
335                               Book III Chapter 12-15
Name: chapters, dtype: object

In [270]:
chapters('A Tale of Two Cities')[323]

'A Tale of Two Cities:Novel Summary: Book I Chapter 1-4'

In [271]:
## Function to split chapters does not seem to be working to split up the three books of this text. Will split step by step instead

attc_text = open('./data/book_full_texts/A_Tale_of_Two_Cities.txt', 'r').read()

In [272]:
attc_book1 = attc_text.partition('Book the Second--the Golden Thread')[0]

In [273]:
attc_book2 = (attc_text.partition('Book the Second--the Golden Thread')[2]).partition('Book the Third--the Track of a Storm')[0]

In [274]:
attc_book3 = attc_text.partition('Book the Third--the Track of a Storm')[2]

In [275]:
attc_books = [attc_book1, attc_book2, attc_book3]

In [276]:
attc_ch_names_list = []

In [277]:
attc_bk1_names =[
    'The Period',  
    'II',
    'III',
    'IV',
    'V',
    'VI',
]
attc_ch_names_list.append(attc_bk1_names)


attc_bk2_names = [ 
    'Five Years Later',   
     'II',
     'III',
     'IV',
     'V',
     'VI',
     'VII',
     'VIII',
     'IX',
     'X',
     'XI',
     'XII',
     'XIII',
     'XIV',
     'XV',
     'XVI',
     'XVII',
     'XVIII',
     'XIX',
     'XX',
     'XXI',
     'XXII',
     'XXIII',
     'XXIV'
]
attc_ch_names_list.append(attc_bk2_names)

attc_bk3_names = [ 
    'In Secret',   
    'II',
    'III',
    'IV',
    'V',
    'VI',
    'VII',
    'VIII',
    'IX',
    'X',
    'XI',
    'XII',
    'XIII',
    'XIV',
    'XV'
]
attc_ch_names_list.append(attc_bk3_names)

In [278]:
attc_texts = []
for i in range(3): 
    text = attc_books[i]
    chapter_names = attc_ch_names_list[i]
    num_chapters = len(chapter_names)

    partition_1 = text.partition(chapter_names[1])
    chapter_1 = str(partition_1[0])
    attc_texts.append(chapter_1)

    for i in range(2, num_chapters):
        partition_ch_group = text.partition(chapter_names[i])
        partition_ch_subgroup = str(partition_ch_group[0])
        partition_ch_sep = partition_ch_subgroup.partition(chapter_names[i-1])
        partition_ch = str(partition_ch_sep[2])
        attc_texts.append(partition_ch)

    partition_last = text.partition(chapter_names[num_chapters-1])
    chapter_last = str(partition_last[2])
    attc_texts.append(chapter_last)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [279]:
attc_df = start_df('A Tale of Two Cities')
attc_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
322,A Tale of Two Cities: Summary,\nBook I Chapter 1-4\nThe narrator begins his ...,A Tale of Two Cities,A Tale of Two Cities: Summary
323,A Tale of Two Cities:Novel Summary: Book I Cha...,Book I\nChapter 1-4\nThe narrator begins his s...,A Tale of Two Cities,A Tale of Two Cities:Novel Summary: Book I Cha...
324,A Tale of Two Cities: Novel Summary: Book I Ch...,Book I\nChapter 5-6\nThe scene is the squalid ...,A Tale of Two Cities,Book I Chapter 5-6
325,A Tale of Two Cities: Novel Summary: Book II C...,"Book II\nChapter 1-3\nIt is five years later, ...",A Tale of Two Cities,Book II Chapter 1-3
326,A Tale of Two Cities: Novel Summary: Book II C...,Book II\nChapter 4-5\nOutside the court Charle...,A Tale of Two Cities,Book II Chapter 4-5
327,A Tale of Two Cities: Novel Summary: Book II C...,Book II\nChapter 6\nFour months pass. Mr. Lorr...,A Tale of Two Cities,Book II Chapter 6
328,A Tale of Two Cities: Novel Summary: Book II C...,"Book II\nChapter 7-9\nIn Paris, Monseigneur (w...",A Tale of Two Cities,Book II Chapter 7-9
329,A Tale of Two Cities: Novel Summary: Book II C...,Book II\nChapter 10-14\nIt is one year later a...,A Tale of Two Cities,Book II Chapter 10-14
330,A Tale of Two Cities: Novel Summary: Book II C...,Book II\nChapter 15-16\nFor three days in a ro...,A Tale of Two Cities,Book II Chapter 15-16
331,A Tale of Two Cities: Novel Summary: Book II C...,Book II\nChapter 17-20\nThe night before Lucie...,A Tale of Two Cities,Book II Chapter 16-20


In [280]:
attc_df.drop(index=322, inplace=True)

In [281]:
list(chapters('A Tale of Two Cities'))

['A Tale of Two Cities: Summary',
 'A Tale of Two Cities:Novel Summary: Book I Chapter 1-4',
 'Book I Chapter 5-6',
 'Book II Chapter 1-3',
 'Book II Chapter 4-5',
 'Book II Chapter 6',
 'Book II Chapter 7-9',
 'Book II Chapter 10-14',
 'Book II Chapter 15-16',
 'Book II Chapter 16-20',
 'Book II Chapter 21-24',
 'Book III Chapter 1-5',
 'Book III Chapter 6-11',
 'Book III Chapter 12-15']

In [282]:
attc_df['chapter_text'] = [
    ' '.join(attc_texts[0:3]),
    ' '.join(attc_texts[4:5]),
    ' '.join(attc_texts[6:8]),
    ' '.join(attc_texts[9:10]),
    attc_texts[11],
    ' '.join(attc_texts[12:14]),
    ' '.join(attc_texts[15:19]),
    ' '.join(attc_texts[20:21]),
    ' '.join(attc_texts[22:26]),
    ' '.join(attc_texts[27:20]),
    ' '.join(attc_texts[21:25]),
    ' '.join(attc_texts[26:31]),
    ' '.join(attc_texts[32:35]),
]

#### Add book to main dataframe

In [283]:
main_df = pd.concat([main_df, attc_df])

### Winesburg, Ohio

#### Split full text into chapters

In [284]:
chapters('Winesburg, Ohio')

336                         A Man of Ideas
337                              Adventure
338                           An Awakening
339                                  Death
340                              Departure
341                                  Drink
342                       Godliness I - VI
343                                  Hands
344                             Loneliness
345                                 Mother
346                            Paper Pills
347                                  Queer
348    Respectability, The Thinker & Tandy
349                         Sophistication
350              The Book of the Grotesque
351         The Philosopher & Nobody Knows
352      The Strength of God & The Teacher
353                         The Untold Lie
Name: chapters, dtype: object

In [285]:
start_df('Winesburg, Ohio')

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
336,"Winesburg, Ohio: Novel Summary: A Man of Ideas","Summary\n\tJoe Welling, an agent for the Stand...","Winesburg, Ohio",A Man of Ideas
337,"Winesburg, Ohio: Novel Summary: Adventure",Summary\n\tTwenty-seven-year-old Alice Hindman...,"Winesburg, Ohio",Adventure
338,"Winesburg, Ohio: Novel Summary: An Awakening",Summary\n\tGeorge often goes to see Belle Carp...,"Winesburg, Ohio",An Awakening
339,"Winesburg, Ohio: Novel Summary: Death",Summary\n\tElizabeth Willard often visited Doc...,"Winesburg, Ohio",Death
340,"Winesburg, Ohio: Novel Summary: Departure",Summary\n\tGeorge Willard leaves Winesburg to ...,"Winesburg, Ohio",Departure
341,"Winesburg, Ohio: Novel Summary: Drink",Summary\n\tTom Foster moved to Winesburg with ...,"Winesburg, Ohio",Drink
342,"Winesburg, Ohio: Novel Summary: Godliness I - VI",Godliness I\n\tThis is the first in a four-par...,"Winesburg, Ohio",Godliness I - VI
343,"Winesburg, Ohio: Novel Summary: Hands","Summary\n\tWing Biddlebaum, a forty-year-old m...","Winesburg, Ohio",Hands
344,"Winesburg, Ohio: Novel Summary: Loneliness",Summary\n\tEnoch Robinson is a childish man wh...,"Winesburg, Ohio",Loneliness
345,"Winesburg, Ohio: Novel Summary: Mother",Summary\n\tElizabeth Willard owns the Winesbur...,"Winesburg, Ohio",Mother


In [286]:
# The chapters of this book are in the wronrg order in the dataframe - will reorder

ch_correct_order = ['The Book of the Grotesque', 'Hands', 'Paper Pills', 'Mother', 'The Philosopher & Nobody Knows', 'Godliness I - VI', 'A Man of Ideas', 'Adventure',
                    'Respectability, The Thinker & Tandy', 'The Strength of God & The Teacher', 'Loneliness', 'An Awakening', 'Queer', 'The Untold Lie', 'Drink', 'Death', 'Sophistication', 'Departure'
]

In [287]:
wo_df = start_df('Winesburg, Ohio')[start_df('Winesburg, Ohio')['chapters']=='The Book of the Grotesque']

In [288]:
for i in range(1, len(ch_correct_order)):
    wo_df = pd.concat([ wo_df, start_df('Winesburg, Ohio')[start_df('Winesburg, Ohio')['chapters']==ch_correct_order[i]] ])

In [289]:
wo_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
350,"Winesburg, Ohio: Novel Summary: The Book of th...",Summary\n\tAn old writer has a bed that his ca...,"Winesburg, Ohio",The Book of the Grotesque
343,"Winesburg, Ohio: Novel Summary: Hands","Summary\n\tWing Biddlebaum, a forty-year-old m...","Winesburg, Ohio",Hands
346,"Winesburg, Ohio: Novel Summary: Paper Pills","Summary\n\tDoctor Reefy, an old man with a whi...","Winesburg, Ohio",Paper Pills
345,"Winesburg, Ohio: Novel Summary: Mother",Summary\n\tElizabeth Willard owns the Winesbur...,"Winesburg, Ohio",Mother
351,"Winesburg, Ohio: Novel Summary: The Philosophe...",The Philosopher\n\tDoctor Parcival is one of t...,"Winesburg, Ohio",The Philosopher & Nobody Knows
342,"Winesburg, Ohio: Novel Summary: Godliness I - VI",Godliness I\n\tThis is the first in a four-par...,"Winesburg, Ohio",Godliness I - VI
336,"Winesburg, Ohio: Novel Summary: A Man of Ideas","Summary\n\tJoe Welling, an agent for the Stand...","Winesburg, Ohio",A Man of Ideas
337,"Winesburg, Ohio: Novel Summary: Adventure",Summary\n\tTwenty-seven-year-old Alice Hindman...,"Winesburg, Ohio",Adventure
348,"Winesburg, Ohio: Novel Summary: Respectability...","Respectability\n\tWash Williams is a huge, dir...","Winesburg, Ohio","Respectability, The Thinker & Tandy"
352,"Winesburg, Ohio: Novel Summary: The Strength o...",The Strength of God\n\tReverend Curtis Hartman...,"Winesburg, Ohio",The Strength of God & The Teacher


In [290]:
wo_ch_names = ['GROTESQUE', 'HANDS', 'PAPER PILLS', 'MOTHER', 'THE PHILOSOPHER', 'NOBODY KNOWS', 'GODLINESS', 'A MAN OF IDEAS', 'ADVENTURE',
                    'RESPECTABILITY', 'THE THINKER', 'TANDY', 'THE STRENGTH OF GOD', 'THE TEACHER', 'LONELINESS', 'AN AWAKENING', '\"Queer\"', 
                    'THE UNTOLD LIE', 'DRINK', 'DEATH', 'SOPHISTICATION', 'DEPARTURE'
]

In [291]:
wo_ch_texts = split_chapters('Winesburg, Ohio', wo_ch_names)

#### Create dataframe with chapter titles, chapter summaries, book title, chapter names, and chapter texts

In [292]:
wo_df['chapter_text'] = [
    wo_ch_texts[0],
    wo_ch_texts[1],
    wo_ch_texts[2],
    wo_ch_texts[3],
    wo_ch_texts[4:5],
    wo_ch_texts[6],
    wo_ch_texts[7],
    wo_ch_texts[8],
    wo_ch_texts[9:11],
    wo_ch_texts[12:13],
    wo_ch_texts[14],
    wo_ch_texts[15],
    wo_ch_texts[16],
    wo_ch_texts[17],
    wo_ch_texts[18],
    wo_ch_texts[19],
    wo_ch_texts[20],
    wo_ch_texts[21],
]

#### Add book to main dataframe

In [293]:
main_df = pd.concat([main_df, wo_df])

In [294]:
len(main_df)

321

### Middlemarch

#### Split full texts into chapters

In [295]:
chapters('Middlemarch')

354    Middlemarch: Novel Summary
Name: chapters, dtype: object

In [296]:
start_df('Middlemarch').iloc[0]['chapter_summary'][:300]

'Prelude\nSummary\n\tThis is a charming vignette of Saint Theresa of Avila as a little girl, holding her brother by the hand, going out into the country side looking for martyrdom, illustrating the “passionate, ideal nature [that demands] an epic life.” Such a girl who has a “rapturous consciousness of '

This text includes a summary of the prelude and one summary for chapters 1-5. These two summaries will be included in the dataset.

##### Split summary texts into sections & create dataframe for book

In [297]:
mm_summary_text = start_df('Middlemarch').iloc[0]['chapter_summary']

In [298]:
mm_summary_text

'Prelude\nSummary\n\tThis is a charming vignette of Saint Theresa of Avila as a little girl, holding her brother by the hand, going out into the country side looking for martyrdom, illustrating the “passionate, ideal nature [that demands] an epic life.” Such a girl who has a “rapturous consciousness of life beyond self” could hardly be content with a normal woman’s life. She is the type of many such women today who yearn for an expanded life but are not helped by the “tangled circumstance” of society. This tragic sort of woman in the modern world has no channel for her life force, but only a “vague ideal and the common yearning of womanhood.” She is a swan among ducks and finds no fellowship.\n\tAnalysis\n\tEliot’s famous Prelude to Middlemarch could be the outcry of the Victorian woman, or women of any age, who have no outlet for their talents or direction for their spiritual lives. They are like the child Theresa, who nevertheless, even in a man’s world, grew up to reform a religious

In [299]:
mm_prelude_sum = mm_summary_text.partition('Analysis')[0]
mm_prelude_sum

'Prelude\nSummary\n\tThis is a charming vignette of Saint Theresa of Avila as a little girl, holding her brother by the hand, going out into the country side looking for martyrdom, illustrating the “passionate, ideal nature [that demands] an epic life.” Such a girl who has a “rapturous consciousness of life beyond self” could hardly be content with a normal woman’s life. She is the type of many such women today who yearn for an expanded life but are not helped by the “tangled circumstance” of society. This tragic sort of woman in the modern world has no channel for her life force, but only a “vague ideal and the common yearning of womanhood.” She is a swan among ducks and finds no fellowship.\n\t'

In [300]:
mm_ch_1_5_sum = (mm_summary_text.partition('Chapters 1-5')[2]).partition('Analysis')[0]
mm_ch_1_5_sum

' (The Brooke Sisters)\nSummary\n\tDorothea Brooke and her younger sister, Celia, are two young ladies of marriageable age who have recently come to live with their bachelor uncle and guardian, Mr. Brooke, at Tipton Grange. Though Dorothea is the striking and beautiful older sister, the rural opinion favors the pretty Celia as the one who is easier to understand. While Celia knows life through common sense, Dorothea is always out of place with her refined religious perceptions and longings. Dorothea is severe and Puritanical compared to Celia, who has normal coquettish desires for dress, jewelry, and conquests.\n\tDorothea does not want a husband like the neighbor, Sir James Chettham, an amiable but ordinary squire, who is in love with her. She thinks him boring, though in her passion for good works, she persuades him to build decent cottages for his tenants and gives him her designs. He goes along with her plans as he prepares to propose to her. No one reckons on Dorothea’s lofty thou

In [301]:
start_df('Middlemarch')

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
354,Middlemarch: Novel Summary,Prelude\nSummary\n\tThis is a charming vignett...,Middlemarch,Middlemarch: Novel Summary


In [302]:
mm = {
    'chapter_title':['Middlemarch: Novel Summary Prelude', 'Middlemarch: Novel Summary Chapters 1-5'],
    'chapter_summary':[mm_prelude_sum, mm_ch_1_5_sum],
    'book_title':['Middlemarch', 'Middlemarch'],
    'chapters':['Prelude', 'Chapters 1-5']
}

mm_df = pd.DataFrame(mm)

mm_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters
0,Middlemarch: Novel Summary Prelude,Prelude\nSummary\n\tThis is a charming vignett...,Middlemarch,Prelude
1,Middlemarch: Novel Summary Chapters 1-5,(The Brooke Sisters)\nSummary\n\tDorothea Bro...,Middlemarch,Chapters 1-5


##### Split text into chapters

In [303]:
mm_text = open('./data/book_full_texts/Middlemarch.txt', 'r').read()

In [304]:
mm_prelude = mm_text.partition('BOOK I.')[0]

In [305]:
mm_ch_1_5 = (mm_text.partition('CHAPTER I.')[2]).partition('CHAPTER VI')[0]

##### Add chapter texts to dataframe

In [306]:
mm_df['chapter_text'] = [mm_prelude, mm_ch_1_5]
mm_df

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
0,Middlemarch: Novel Summary Prelude,Prelude\nSummary\n\tThis is a charming vignett...,Middlemarch,Prelude,MiddlemarchbyGeorge Eliot\n\n\n\n\nScanned wit...
1,Middlemarch: Novel Summary Chapters 1-5,(The Brooke Sisters)\nSummary\n\tDorothea Bro...,Middlemarch,Chapters 1-5,"\n\n ""Since I can do no good because a woma..."


#### Add book to main dataframe

In [307]:
main_df = pd.concat([main_df, mm_df])

In [308]:
len(main_df)

323

# Check main dataframe

## Examine main df

In [309]:
main_df.head(25)

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
16,The Age of Innocence: Novel Summary: Chapters 1-3,Chapters 1-3\n\n\t \nSummary\n\tThe story open...,The Age of Innocence,Chapters 1-3,The Age of InnocencebyEdith Wharton\n\n\n\n\n\...
17,The Age of Innocence: Novel Summary: Chapters 4-6,Chapters 4-6\nSummary\n\tArcher and May begin ...,The Age of Innocence,Chapters 4-6,\n\nIn the course of the next day the first of...
18,The Age of Innocence: Novel Summary: Chapters 7-9,Chapters 7-9\nSummary\n\tMrs Archer and her so...,The Age of Innocence,Chapters 7-9,\n\nMrs. Henry van der Luyden listened in sile...
19,The Age of Innocence: Novel Summary: Chapters ...,Chapters 10-12\nSummary\n\tArcher tells May ab...,The Age of Innocence,Chapters 10-12,"\n\nThe Countess Olenska had said ""after five""..."
20,The Age of Innocence: Novel Summary: Chapters ...,"Chapters 13-15\nSummary\n\tAt the theatre, Arc...",The Age of Innocence,Chapters 13-15,\n\nIt was a crowded night at Wallack's theatr...
21,The Age of Innocence: Novel Summary: Chapters ...,Chapters 16-18\nSummary\n\tArcher arrives at S...,The Age of Innocence,Chapters 16-18,\n\nWhen Archer walked down the sandy main str...
22,The Age of Innocence: Novel Summary: Chapters ...,Chapters 19-21\nSummary\n\tUnder the eyes of N...,The Age of Innocence,Chapters 19-21,"\n\nThe day was fresh, with a lively spring wi..."
23,The Age of Innocence: Novel Summary: Chapters ...,Chapters 22-24\nSummary\n\tMr and Mrs Emerson ...,The Age of Innocence,Chapters 22-24,"\n\nA party for the Blenkers--the Blenkers?""\n..."
24,The Age of Innocence: Novel Summary: Chapters ...,Chapters 25-27\nSummary\n\tAs he leaves Boston...,The Age of Innocence,Chapters 25-27,"\n\nOnce more on the boat, and in the presence..."
25,The Age of Innocence: Novel Summary: Chapters ...,Chapters 28-30\nSummary\n\tArcher sends a tele...,The Age of Innocence,Chapters 28-30,"\n\nOl-ol--howjer spell it, anyhow?"" asked the..."


## Check that all texts added to main df correctly

In [310]:
main_df.isna().sum()

chapter_title      0
chapter_summary    0
book_title         0
chapters           0
chapter_text       0
dtype: int64

In [311]:
main_df['chapter_text'].isna().sum()

0

In [312]:
main_df[main_df['chapter_text']=='']

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
69,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 3\n\tFalstaff tells the Hos...,Merry Wives of Windsor,Act I Scene 3,
70,Merry Wives of Windsor: Novel Summary: Act I S...,"\n\t \nAct I Scene 4\n\tAt Dr. Caius's house, ...",Merry Wives of Windsor,Act I Scene 4,
72,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 2\n\tPistol asks Falstaff ...,Merry Wives of Windsor,Act II Scene 2,
73,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 3\n\t- Act III Scene 1\n\t...,Merry Wives of Windsor,Act II Scene 3 - Act III Scene 1,
74,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 2\n\tFord encounters Mrs....,Merry Wives of Windsor,Act III Scene 2,
75,Merry Wives of Windsor: Novel Summary: Act III...,"\n\t \nAct III Scene 3\n\tAt Ford's house, Mrs...",Merry Wives of Windsor,Act III Scene 3,
76,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 4\n\tFenton tells Anne Pa...,Merry Wives of Windsor,Act III Scene 4,
77,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 5\n\tFalstaff tells Bardo...,Merry Wives of Windsor,Act III Scene 5,
78,Merry Wives of Windsor: Novel Summary: Act IV ...,\n\t \nAct IV\n\tScene 1-2\n\tScene 1 is a sho...,Merry Wives of Windsor,Act IV Scene 1-2,
79,Merry Wives of Windsor: Novel Summary: Act IV ...,"\n\t \nAct IV\n\tScene 3-4\n\tIn scene 3, Bard...",Merry Wives of Windsor,Act IV Scene 3-4,


In [313]:
len(main_df[main_df['chapter_text']==''])

26

There are 26 rows of the dataframe for which the chapter texts are missing

### Fill in missing chapter texts

#### Reset main_df index for ease of use in indentifying rows

In [314]:
main_df.index

Int64Index([ 16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
            ...
            344, 338, 347, 353, 341, 339, 349, 340,   0,   1],
           dtype='int64', length=323)

In [315]:
main_df.reset_index().head()

Unnamed: 0,index,chapter_title,chapter_summary,book_title,chapters,chapter_text
0,16,The Age of Innocence: Novel Summary: Chapters 1-3,Chapters 1-3\n\n\t \nSummary\n\tThe story open...,The Age of Innocence,Chapters 1-3,The Age of InnocencebyEdith Wharton\n\n\n\n\n\...
1,17,The Age of Innocence: Novel Summary: Chapters 4-6,Chapters 4-6\nSummary\n\tArcher and May begin ...,The Age of Innocence,Chapters 4-6,\n\nIn the course of the next day the first of...
2,18,The Age of Innocence: Novel Summary: Chapters 7-9,Chapters 7-9\nSummary\n\tMrs Archer and her so...,The Age of Innocence,Chapters 7-9,\n\nMrs. Henry van der Luyden listened in sile...
3,19,The Age of Innocence: Novel Summary: Chapters ...,Chapters 10-12\nSummary\n\tArcher tells May ab...,The Age of Innocence,Chapters 10-12,"\n\nThe Countess Olenska had said ""after five""..."
4,20,The Age of Innocence: Novel Summary: Chapters ...,"Chapters 13-15\nSummary\n\tAt the theatre, Arc...",The Age of Innocence,Chapters 13-15,\n\nIt was a crowded night at Wallack's theatr...


In [316]:
main_df.reset_index(inplace=True)

In [317]:
main_df.head()

Unnamed: 0,index,chapter_title,chapter_summary,book_title,chapters,chapter_text
0,16,The Age of Innocence: Novel Summary: Chapters 1-3,Chapters 1-3\n\n\t \nSummary\n\tThe story open...,The Age of Innocence,Chapters 1-3,The Age of InnocencebyEdith Wharton\n\n\n\n\n\...
1,17,The Age of Innocence: Novel Summary: Chapters 4-6,Chapters 4-6\nSummary\n\tArcher and May begin ...,The Age of Innocence,Chapters 4-6,\n\nIn the course of the next day the first of...
2,18,The Age of Innocence: Novel Summary: Chapters 7-9,Chapters 7-9\nSummary\n\tMrs Archer and her so...,The Age of Innocence,Chapters 7-9,\n\nMrs. Henry van der Luyden listened in sile...
3,19,The Age of Innocence: Novel Summary: Chapters ...,Chapters 10-12\nSummary\n\tArcher tells May ab...,The Age of Innocence,Chapters 10-12,"\n\nThe Countess Olenska had said ""after five""..."
4,20,The Age of Innocence: Novel Summary: Chapters ...,"Chapters 13-15\nSummary\n\tAt the theatre, Arc...",The Age of Innocence,Chapters 13-15,\n\nIt was a crowded night at Wallack's theatr...


In [318]:
main_df.drop(columns='index', inplace=True)

#### Re-examine rows with chapter texts missing

In [319]:
main_df[main_df['chapter_text']=='']

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
51,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 3\n\tFalstaff tells the Hos...,Merry Wives of Windsor,Act I Scene 3,
52,Merry Wives of Windsor: Novel Summary: Act I S...,"\n\t \nAct I Scene 4\n\tAt Dr. Caius's house, ...",Merry Wives of Windsor,Act I Scene 4,
54,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 2\n\tPistol asks Falstaff ...,Merry Wives of Windsor,Act II Scene 2,
55,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 3\n\t- Act III Scene 1\n\t...,Merry Wives of Windsor,Act II Scene 3 - Act III Scene 1,
56,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 2\n\tFord encounters Mrs....,Merry Wives of Windsor,Act III Scene 2,
57,Merry Wives of Windsor: Novel Summary: Act III...,"\n\t \nAct III Scene 3\n\tAt Ford's house, Mrs...",Merry Wives of Windsor,Act III Scene 3,
58,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 4\n\tFenton tells Anne Pa...,Merry Wives of Windsor,Act III Scene 4,
59,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 5\n\tFalstaff tells Bardo...,Merry Wives of Windsor,Act III Scene 5,
60,Merry Wives of Windsor: Novel Summary: Act IV ...,\n\t \nAct IV\n\tScene 1-2\n\tScene 1 is a sho...,Merry Wives of Windsor,Act IV Scene 1-2,
61,Merry Wives of Windsor: Novel Summary: Act IV ...,"\n\t \nAct IV\n\tScene 3-4\n\tIn scene 3, Bard...",Merry Wives of Windsor,Act IV Scene 3-4,


#### Fill in texts

##### Function to get text of book from title

In [320]:
def get_text(title):
    text_title = title.replace(' ', '_')
    text = open(f'./data/book_full_texts/{text_title}.txt', 'r').read()
    return text

##### Winesburg, Ohio: Novel Summary: Queer

In [321]:
main_df.iloc[315]

chapter_title                  Winesburg, Ohio: Novel Summary: Queer
chapter_summary    Summary\n\tElmer Cowley is the son of Ebenezer...
book_title                                           Winesburg, Ohio
chapters                                                       Queer
chapter_text                                                        
Name: 315, dtype: object

In [322]:
wo_text = get_text('Winesburg, Ohio')

In [323]:
wo_text[:300]

'Winesburg, OhiobySherwood Anderson\n\n\n\n\n\nSHERWOOD ANDERSON\n\nWinesburg, Ohio\n\n\n\n\nINTRODUCTION\n\nby Irving Howe\n\n\nI must have been no more than fifteen or sixteen\nyears old when I first chanced upon Winesburg, Ohio.\nGripped by these stories and sketches of Sherwood\nAnderson\'s small-town "grotesques," I '

In [324]:
wo_queer = (wo_text.partition('\"QUEER\"')[2]).partition('THE UNTOLD LIE')[0]

In [325]:
wo_queer[-300:]

'ssing train and running over\nthe tops of cars, Elmer sprang down to a flat car and\nlying on his face looked back, trying to see the fallen\nman in the darkness.  Pride surged up in him.  "I\nshowed him," he cried.  "I guess I showed him.  I\nain\'t so queer.  I guess I showed him I ain\'t so\nqueer."\n\n\n\n\n'

In [326]:
wo_queer[:300]

"\n\nFROM HIS SEAT on a box in the rough board shed that\nstuck like a burr on the rear of Cowley & Son's store\nin Winesburg, Elmer Cowley, the junior member of\nthe firm, could see through a dirty window into the\nprintshop of the Winesburg Eagle.  Elmer was putting\nnew shoelaces in his shoes.  They did "

In [327]:
main_df.iloc[315]['chapter_text']

''

In [328]:
main_df.iloc[315]['chapter_text'] = wo_queer

In [329]:
main_df.iloc[315]

chapter_title                  Winesburg, Ohio: Novel Summary: Queer
chapter_summary    Summary\n\tElmer Cowley is the son of Ebenezer...
book_title                                           Winesburg, Ohio
chapters                                                       Queer
chapter_text       \n\nFROM HIS SEAT on a box in the rough board ...
Name: 315, dtype: object

##### A Tale of Two Cities - Book II Ch 4-5 & Book II Ch 21-24

In [330]:
main_df.iloc[293]

chapter_title      A Tale of Two Cities: Novel Summary: Book II C...
chapter_summary    Book II\nChapter 4-5\nOutside the court Charle...
book_title                                      A Tale of Two Cities
chapters                                         Book II Chapter 4-5
chapter_text                                                        
Name: 293, dtype: object

In [331]:
main_df.iloc[299]

chapter_title      A Tale of Two Cities: Novel Summary: Book II C...
chapter_summary    Book II\nChapter 21-24\nYears pass. Lucie occa...
book_title                                      A Tale of Two Cities
chapters                                       Book II Chapter 21-24
chapter_text                                                        
Name: 299, dtype: object

In [332]:
attc_text = get_text('A Tale of Two Cities')

In [333]:
attc_bk2 = attc_text.partition('Book the Second--the Golden Thread')[2]
attc_bk2[:300]

"\n\n\n\n\nI\n\nFive Years Later\n\n\nTellson's Bank by Temple Bar was an old-fashioned place, even in the\nyear one thousand seven hundred and eighty.  It was very small, very\ndark, very ugly, very incommodious.  It was an old-fashioned place,\nmoreover, in the moral attribute that the partners in the House wer"

In [334]:
attc_4_5 = (attc_bk2.partition('IV')[2]).partition('VI')[0]
attc_4_5[:300]

'\n\nCongratulatory\n\n\nFrom the dimly-lighted passages of the court, the last sediment of the\nhuman stew that had been boiling there all day, was straining off,\nwhen Doctor Manette, Lucie Manette, his daughter, Mr. Lorry, the\nsolicitor for the defence, and its counsel, Mr. Stryver, stood\ngathered round '

In [335]:
attc_4_5[-300:]

's wet with wasted tears.\n\nSadly, sadly, the sun rose; it rose upon no sadder sight than the man\nof good abilities and good emotions, incapable of their directed\nexercise, incapable of his own help and his own happiness, sensible\nof the blight on him, and resigning himself to let it eat him away.\n\n\n\n'

In [336]:
main_df.iloc[293]['chapter_text']

''

In [337]:
main_df.iloc[293]['chapter_text'] = attc_4_5

In [338]:
main_df.iloc[293]

chapter_title      A Tale of Two Cities: Novel Summary: Book II C...
chapter_summary    Book II\nChapter 4-5\nOutside the court Charle...
book_title                                      A Tale of Two Cities
chapters                                         Book II Chapter 4-5
chapter_text       \n\nCongratulatory\n\n\nFrom the dimly-lighted...
Name: 293, dtype: object

In [339]:
attc_21_24 = (attc_bk2.partition('XXI')[2]).partition('The end of the second book.')[0]
attc_21_24[:300]

'\n\nEchoing Footsteps\n\n\nA wonderful corner for echoes, it has been remarked, that corner where\nthe Doctor lived.  Ever busily winding the golden thread which bound\nher husband, and her father, and herself, and her old directress and\ncompanion, in a life of quiet bliss, Lucie sat in the still house in '

In [340]:
attc_21_24[-300:]

'ook horse for Dover; and began his\njourney.  "For the love of Heaven, of justice, of generosity, of the\nhonour of your noble name!" was the poor prisoner\'s cry with which\nhe strengthened his sinking heart, as he left all that was dear on\nearth behind him, and floated away for the Loadstone Rock.\n\n\n\n'

In [341]:
main_df.iloc[299]['chapter_text']

''

In [342]:
main_df.iloc[299]['chapter_text'] = attc_21_24

In [343]:
main_df.iloc[299]

chapter_title      A Tale of Two Cities: Novel Summary: Book II C...
chapter_summary    Book II\nChapter 21-24\nYears pass. Lucie occa...
book_title                                      A Tale of Two Cities
chapters                                       Book II Chapter 21-24
chapter_text       \n\nEchoing Footsteps\n\n\nA wonderful corner ...
Name: 299, dtype: object

##### Notes from the Underground Part 1 Ch 9-10 & Part 2 Ch 4-5

In [344]:
nfu_text = get_text('Notes from the Underground')

In [345]:
nfu_part1 = nfu_text.partition('PART II')[0]
nfu_part1[-300:]

'rted and honest.  Well,\nhere is a chance for me, anyway.\n\nSnow is falling today, yellow and dingy.  It fell yesterday, too, and a few\ndays ago.  I fancy it is the wet snow that has reminded me of that incident\nwhich I cannot shake off now.  And so let it be a story A PROPOS of the\nfalling snow.\n\n\n\n\n'

In [346]:
nfu_9_10 = (nfu_part1.partition('IX')[2]).partition('XI')[0]
nfu_9_10[:300]

'\n\n\nGentlemen, I am joking, and I know myself that my jokes are not\nbrilliant,but you know one can take everything as a joke.  I am, perhaps,\njesting against the grain.  Gentlemen, I am tormented by questions;\nanswer them for me.  You, for instance, want to cure men of their old\nhabits and reform the'

In [347]:
nfu_9_10[-300:]

'eat?  Can this be my whole purpose?  I do not\nbelieve it.\n\nBut do you know what: I am convinced that we underground folk\nought to be kept on a curb.  Though we may sit forty years underground\nwithout speaking, when we do come out into the light of day and break\nout we talk and talk and talk ....\n\n\n\n'

In [348]:
main_df.iloc[283]

chapter_title      Notes from the Underground: Novel Summary: Par...
chapter_summary    Part 1 Chapter 9: Next, the UM uses the analog...
book_title                                Notes from the Underground
chapters                          Part 1 Chapter 9-Part 1 Chapter 10
chapter_text                                                        
Name: 283, dtype: object

In [349]:
main_df.iloc[283]['chapter_text'] = nfu_9_10

In [350]:
main_df.iloc[283]

chapter_title      Notes from the Underground: Novel Summary: Par...
chapter_summary    Part 1 Chapter 9: Next, the UM uses the analog...
book_title                                Notes from the Underground
chapters                          Part 1 Chapter 9-Part 1 Chapter 10
chapter_text       \n\n\nGentlemen, I am joking, and I know mysel...
Name: 283, dtype: object

In [351]:
nfu_part2 = nfu_text.partition('PART II')[2]
nfu_part2[:300]

"\n\nA Propos of the Wet Snow\n\n\nWhen from dark error's subjugation\nMy words of passionate exhortation\n  Had wrenched thy fainting spirit free;\nAnd writhing prone in thine affliction\nThou didst recall with malediction\n  The vice that had encompassed thee:\nAnd when thy slumbering conscience, fretting\n  B"

In [352]:
nfu_part2[-300:]

'oping a\ntaste for it.  Soon we shall contrive to be born somehow from an idea.  But\nenough; I don\'t want to write more from "Underground."\n\n\n[The notes of this paradoxalist do not end here, however.  He could not\nrefrain from going on with them, but it seems to us that we may stop\nhere.]\n\n\n\n\n\n\n\n\n\n\n\n'

In [353]:
nfu_4_5 = (nfu_part2.partition('IV')[2]).partition('...  Somewhere behind a screen')[0]
nfu_4_5[:300]

'\n\n\nI had been certain the day before that I should be the first to arrive.  But it\nwas not a question of being the first to arrive.  Not only were they not\nthere, but I had difficulty in finding our room.  The table was not laid\neven.  What did it mean?  After a good many questions I elicited from t'

In [354]:
nfu_4_5[-300:]

'hsome stirred within me.  I went\nstraight up to her.\n\nI chanced to look into the glass.  My harassed face struck me as\nrevolting in the extreme, pale, angry, abject, with dishevelled hair.  "No\nmatter, I am glad of it," I thought; "I am glad that I shall seem repulsive\nto her; I like that."\n\n\n\nVI\n\n\n'

In [355]:
main_df.iloc[286]

chapter_title      Notes from the Underground: Novel Summary: Par...
chapter_summary    Part 2 Chapter 4-Part 2 Chapter 5\nPart 2 Chap...
book_title                                Notes from the Underground
chapters                           Part 2 Chapter 4-Part 2 Chapter 5
chapter_text                                                        
Name: 286, dtype: object

In [356]:
main_df.iloc[286]['chapter_text'] = nfu_4_5

In [357]:
main_df.iloc[286]

chapter_title      Notes from the Underground: Novel Summary: Par...
chapter_summary    Part 2 Chapter 4-Part 2 Chapter 5\nPart 2 Chap...
book_title                                Notes from the Underground
chapters                           Part 2 Chapter 4-Part 2 Chapter 5
chapter_text       \n\n\nI had been certain the day before that I...
Name: 286, dtype: object

##### 4 chapters from The Red Badge of Courage

In [358]:
rbc_text = get_text('The Red Badge of Courage')

In [359]:
main_df.iloc[278]['chapter_title']

'The Red Badge of Courage : Novel Summary: Chapter 23-24'

In [360]:
rbc_23_24 = rbc_text.partition('CHAPTER XXIII.')[2]
rbc_23_24[:300]

'\n\n\nTHE colonel came running along back of the\nline.  There were other officers following him.\n"We must charge\'m!" they shouted.  "We must\ncharge\'m!" they cried with resentful voices, as\nif anticipating a rebellion against this plan by the\nmen.\n\nThe youth, upon hearing the shouts, began to\nstudy the '

In [361]:
rbc_23_24[-300:]

"t.\nHe had been an animal blistered and sweating in\nthe heat and pain of war.  He turned now with a\nlover's thirst to images of tranquil skies, fresh\nmeadows, cool brooks--an existence of soft and\neternal peace.\n\nOver the river a golden ray of sun came\nthrough the hosts of leaden rain clouds.\n\n\n\n\n\n\n\n"

In [362]:
main_df.iloc[278]['chapter_text'] = rbc_23_24

In [363]:
main_df.iloc[278]

chapter_title      The Red Badge of Courage : Novel Summary: Chap...
chapter_summary    The officers call for a charge, and the men re...
book_title                                  The Red Badge of Courage
chapters           The Red Badge of Courage : Novel Summary: Chap...
chapter_text       \n\n\nTHE colonel came running along back of t...
Name: 278, dtype: object

In [364]:
main_df.iloc[276]['chapter_title']

'The Red Badge of Courage : Novel Summary: Chapter 18-19'

In [365]:
rbc_18_19 = (rbc_text.partition('CHAPTER XVIII.')[2]).partition('CHAPTER XX.')[0]

In [366]:
main_df.iloc[276]['chapter_text'] = rbc_18_19

In [367]:
main_df.iloc[276]

chapter_title      The Red Badge of Courage : Novel Summary: Chap...
chapter_summary    During the lull in their portion of the battle...
book_title                                  The Red Badge of Courage
chapters           The Red Badge of Courage : Novel Summary: Chap...
chapter_text       \n\n\nTHE ragged line had respite for some min...
Name: 276, dtype: object

In [368]:
main_df.iloc[275]['chapter_title']

'The Red Badge of Courage : Novel Summary: Chapter 16-17'

In [369]:
rbc_16_17 = (rbc_text.partition('CHAPTER XVI.')[2]).partition('CHAPTER XVIII.')[0]

In [370]:
main_df.iloc[275]['chapter_text'] = rbc_16_17

In [371]:
main_df.iloc[275]

chapter_title      The Red Badge of Courage : Novel Summary: Chap...
chapter_summary    Henry’s regiment marches to some trenches to r...
book_title                                  The Red Badge of Courage
chapters           The Red Badge of Courage : Novel Summary: Chap...
chapter_text       \n\n\nA SPUTTERING of musketry was always to b...
Name: 275, dtype: object

In [372]:
main_df.iloc[273]['chapter_title']

'The Red Badge of Courage : Novel Summary: Chapter 11-12'

In [373]:
rbc_11_12 = (rbc_text.partition('CHAPTER XI.')[2]).partition('CHAPTER XIII.')[0]

In [374]:
main_df.iloc[273]['chapter_text'] = rbc_11_12

In [375]:
main_df.iloc[273]

chapter_title      The Red Badge of Courage : Novel Summary: Chap...
chapter_summary    The noise of the battle grows louder and Henry...
book_title                                  The Red Badge of Courage
chapters           The Red Badge of Courage : Novel Summary: Chap...
chapter_text       \n\n\nHE became aware that the furnace roar of...
Name: 273, dtype: object

##### As You Like It Act 3, Scene 1-Act 3, Scene 2

In [376]:
main_df.iloc[259]

chapter_title      As You Like It: Novel Summary: Act 3, Scene 1-...
chapter_summary    Act 3, Scene 1: Duke Frederick tells Oliver he...
book_title                                            As You Like It
chapters                               Act 3, Scene 1-Act 3, Scene 2
chapter_text                                                        
Name: 259, dtype: object

In [377]:
ayl_text = get_text('As You Like It')

In [378]:
ayl_act3 = (ayl_text.partition('ACT III. SCENE I.')[2]).partition('ACT IV. SCENE I.')[0]
ayl_act3[:300]

"\nThe palace\n\nEnter DUKE FREDERICK, OLIVER, and LORDS\n\n  FREDERICK. Not see him since! Sir, sir, that cannot be.\n    But were I not the better part made mercy,\n    I should not seek an absent argument\n    Of my revenge, thou present. But look to it:\n    Find out thy brother wheresoe'er he is;\n    See"

In [379]:
ayl_1_2 = ayl_act3.partition('SCENE III.')[0]
ayl_1_2[:300]

"\nThe palace\n\nEnter DUKE FREDERICK, OLIVER, and LORDS\n\n  FREDERICK. Not see him since! Sir, sir, that cannot be.\n    But were I not the better part made mercy,\n    I should not seek an absent argument\n    Of my revenge, thou present. But look to it:\n    Find out thy brother wheresoe'er he is;\n    See"

In [380]:
main_df.iloc[259]['chapter_text'] = ayl_1_2

In [381]:
main_df.iloc[259]

chapter_title      As You Like It: Novel Summary: Act 3, Scene 1-...
chapter_summary    Act 3, Scene 1: Duke Frederick tells Oliver he...
book_title                                            As You Like It
chapters                               Act 3, Scene 1-Act 3, Scene 2
chapter_text       \nThe palace\n\nEnter DUKE FREDERICK, OLIVER, ...
Name: 259, dtype: object

##### Dantes Inferno Canto 21-22	

In [382]:
main_df.iloc[117]

chapter_title                The Inferno: Novel Summary: Canto 21-22
chapter_summary    Summary\n\tIn the Fifth Bolgia, Dante sees boi...
book_title                                            Dantes Inferno
chapters                                                 Canto 21-22
chapter_text                                                        
Name: 117, dtype: object

In [383]:
di_text = get_text('Dantes Inferno')

In [384]:
di_21_22 = (di_text.partition('CANTO 21')[2]).partition('CANTO 23')[0]

In [385]:
main_df.iloc[117]['chapter_text'] = di_21_22

In [386]:
main_df.iloc[117]

chapter_title                The Inferno: Novel Summary: Canto 21-22
chapter_summary    Summary\n\tIn the Fifth Bolgia, Dante sees boi...
book_title                                            Dantes Inferno
chapters                                                 Canto 21-22
chapter_text       \n\nFROM bridge to bridge thus, speaking other...
Name: 117, dtype: object

##### Cymbeline Act 1 Scene 7 & Act 3 Scene 8

In [387]:
main_df[main_df['book_title']=='Cymbeline']

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
64,Cymbeline: Novel Summary: Act 1 Scene 1,"Summary\n\tAct 1, scene 1\n\tTwo gentlemen are...",Cymbeline,Act 1 Scene 1,The Complete Works of William Shakespeare Cymb...
65,Cymbeline: Novel Summary: Act 1 Scene 2,Summary\n\tThe Queen tells Imogen that she is ...,Cymbeline,Act 1 Scene 2,\nBritain. A public place\n\nEnter CLOTEN and ...
66,Cymbeline: Novel Summary: Act 1 Scene 3,Summary\n\tThe First Lord advises Cloten to ch...,Cymbeline,Act 1 Scene 3,\nBritain. CYMBELINE'S palace\n\nEnter IMOGEN ...
67,Cymbeline: Novel Summary: Act 1 Scene 4,Summary\n\tImogen questions Pisanio about his ...,Cymbeline,Act 1 Scene 4,"\nRome. PHILARIO'S house\n\nEnter PHILARIO, IA..."
68,Cymbeline: Novel Summary: Act 1 Scene 5,Summary\n\tThe scene is set in Rome at the hou...,Cymbeline,Act 1 Scene 5,"\nBritain. CYMBELINE'S palace\n\nEnter QUEEN, ..."
69,Cymbeline: Novel Summary: Act 1 Scene 6,"Summary\n\tAt Cymbeline's palace, the Queen is...",Cymbeline,Act 1 Scene 6,\nBritain. The palace\n\nEnter IMOGEN alone\n\...
70,Cymbeline: Novel Summary: Act 1 Scene 7,Summary\n\tImogen is lamenting her unhappy sit...,Cymbeline,Act 1 Scene 7,
71,Cymbeline: Novel Summary: Act 2 Scene 1,Summary\n\tCloten is playing bowls with some L...,Cymbeline,Act 2 Scene 1,SCENE I.\nBritain. Before CYMBELINE'S palace\...
72,Cymbeline: Novel Summary: Act 2 Scene 2,Summary\n\tThe scene is set in Imogen's bedroo...,Cymbeline,Act 2 Scene 2,\nBritain. IMOGEN'S bedchamber in CYMBELINE'S ...
73,Cymbeline: Novel Summary: Act 2 Scene 3,"Summary\n\tCloten is playing some Lords, eithe...",Cymbeline,Act 2 Scene 3,\nCYMBELINE'S palace. An ante-chamber adjoinin...


In [388]:
len(main_df[main_df['book_title']=='Cymbeline']['chapter_title'])

28

In [389]:
len(c_texts)

28

In [390]:
main_df.iloc[70]['chapter_summary']

'Summary\n\tImogen is lamenting her unhappy situation, and wishes she had been stolen like her brothers. Pisanio enters with Iachimo, introducing him as a gentleman from Rome who has brought letters from Posthumus, who is safe. In an aside, Iachimo praises Imogen\'s beauty and admits that if her mind is as admirable, he has lost his wager.\n\tPosthumus, in his letter, asks Imogen to treat Iachimo well in return for his "kindnesses" to Posthumus (line 23). She welcomes him. Iachimo appears to be wonder-struck by Imogen\'s beauty, and muses aloud how anyone could fail to distinguish between fair and foul. He seems to imply, without actually saying it outright, that there is another woman whom, in spite of her obvious inferiority ("sluttery," line 44), Posthumus is driven by lust to prefer to Imogen.\n\tImogen is mystified but changes the subject, asking after Posthumus\'s health and state of mind. Iachimo takes his chance to blacken Posthumus\'s character further, saying he is so merry t

Act 1 and Act 2 in the full text of Cymbeline have one less scene than indicated in the scene summaries. The full text scenes and scene sumamries appear at least in some cases to be mismatched. In the interest of time, Cymbeline will be dropped from the dataset for now. The data for this book can be cleaned up and added back in order to improve models in the future.

In [391]:
main_df.shape

(323, 5)

In [392]:
main_df.drop(index=list(range(64, 92))).shape

(295, 5)

In [393]:
main_df.drop(index=list(range(64, 92)), inplace=True)

In [397]:
main_df.reset_index(inplace=True)

In [398]:
main_df.drop(columns='index', inplace=True)

##### Merry Wives of Windsor

In [399]:
main_df[main_df['book_title']=='Merry Wives of Windsor']

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
50,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 1-2\n\tThe Merry Wives of W...,Merry Wives of Windsor,Act I Scene 1-2,"\nSCENE:\nWindsor, and the neighbourhood\n\n\n..."
51,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 3\n\tFalstaff tells the Hos...,Merry Wives of Windsor,Act I Scene 3,
52,Merry Wives of Windsor: Novel Summary: Act I S...,"\n\t \nAct I Scene 4\n\tAt Dr. Caius's house, ...",Merry Wives of Windsor,Act I Scene 4,
53,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 1\n\tMrs. Page reads the l...,Merry Wives of Windsor,Act II Scene 1,SCENE 1.\n\nBefore PAGE'S house\n\nEnter MIST...
54,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 2\n\tPistol asks Falstaff ...,Merry Wives of Windsor,Act II Scene 2,
55,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 3\n\t- Act III Scene 1\n\t...,Merry Wives of Windsor,Act II Scene 3 - Act III Scene 1,
56,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 2\n\tFord encounters Mrs....,Merry Wives of Windsor,Act III Scene 2,
57,Merry Wives of Windsor: Novel Summary: Act III...,"\n\t \nAct III Scene 3\n\tAt Ford's house, Mrs...",Merry Wives of Windsor,Act III Scene 3,
58,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 4\n\tFenton tells Anne Pa...,Merry Wives of Windsor,Act III Scene 4,
59,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 5\n\tFalstaff tells Bardo...,Merry Wives of Windsor,Act III Scene 5,


A large portion of the chapter texts from this book were not saved correctly. Chapter texts will be re-added to all rows

In [400]:
mw_text = get_text('Merry Wives of Windsor')

In [401]:
mw_act1 = mw_text.partition('ACT II. SCENE 1.')[0]
mw_act1[:300]

"\nSCENE:\nWindsor, and the neighbourhood\n\n\nThe Merry Wives of Windsor\n\n\n\nACT I. SCENE 1.\n\nWindsor. Before PAGE'S house\n\nEnter JUSTICE SHALLOW, SLENDER, and SIR HUGH EVANS\n\n  SHALLOW. Sir Hugh, persuade me not; I will make a Star\n    Chamber matter of it; if he were twenty Sir John Falstaffs,\n    he sh"

In [402]:
mw_act1[-300:]

"  FENTON. Well, farewell; I am in great haste now.\n  QUICKLY. Farewell to your worship.  [Exit FENTON]  Truly,\n    an honest gentleman; but Anne loves him not; for I know \n    Anne's mind as well as another does. Out upon 't, what\n    have I forgot?                                          Exit\n\n\n\n\n"

In [403]:
mw_act2 = (mw_text.partition('ACT II. SCENE 1.')[2]).partition('ACT III SCENE 1.')[0]
mw_act2[:300]

"\n\nBefore PAGE'S house\n\nEnter MISTRESS PAGE, with a letter\n\n  MRS. PAGE. What! have I scap'd love-letters in the holiday-time\n    of my beauty, and am I now a subject for them? Let\n    me see.                                              [Reads]\n    'Ask me no reason why I love you; for though Love u"

In [404]:
mw_act3 = (mw_text.partition('ACT III SCENE 1.')[2]).partition('ACT IV. SCENE I.')[0]

In [405]:
mw_act4 = (mw_text.partition('ACT IV. SCENE I.')[2]).partition('ACT V. SCENE 1.')[0]

In [406]:
mw_act5 = mw_text.partition('ACT V. SCENE 1.')[2]

In [407]:
mw_act1_1 = mw_act1.partition('SCENE 2.')[0]
mw_act1_2 = (mw_act1.partition('SCENE 2.')[2]).partition('SCENE 3.')[0]
mw_act1_3 = (mw_act1.partition('SCENE 3.')[2]).partition('SCENE 4.')[0]
mw_act1_4 = mw_act1.partition('SCENE 4.')[2]

In [408]:
mw_act2_1 = mw_act2.partition('SCENE 2.')[0]
mw_act2_2 = (mw_act2.partition('SCENE 2.')[2]).partition('SCENE 3.')[0]
mw_act2_3 = mw_act2.partition('SCENE 3.')[2]

In [409]:
mw_act3_1 = mw_act3.partition('SCENE 2.')[0]
mw_act3_2 = (mw_act3.partition('SCENE 2.')[2]).partition('SCENE 3.')[0]
mw_act3_3 = (mw_act3.partition('SCENE 3.')[2]).partition('SCENE 4.')[0]
mw_act3_4 = (mw_act3.partition('SCENE 4.')[2]).partition('SCENE 5.')[0]
mw_act3_5 = mw_act3.partition('SCENE 5.')[2]

In [410]:
mw_act4_1 = mw_act4.partition('SCENE 2.')[0]
mw_act4_2 = (mw_act4.partition('SCENE 2.')[2]).partition('SCENE 3.')[0]
mw_act4_3 = (mw_act4.partition('SCENE 3.')[2]).partition('SCENE 4')[0]
mw_act4_4 = (mw_act4.partition('SCENE 4')[2]).partition('SCENE 5.')[0]
mw_act4_5 = (mw_act4.partition('SCENE 5.')[2]).partition('SCENE 6.')[0]
mw_act4_6 = mw_act4.partition('SCENE 6.')[2]

In [411]:
main_df[main_df['book_title']=='Merry Wives of Windsor']

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
50,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 1-2\n\tThe Merry Wives of W...,Merry Wives of Windsor,Act I Scene 1-2,"\nSCENE:\nWindsor, and the neighbourhood\n\n\n..."
51,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 3\n\tFalstaff tells the Hos...,Merry Wives of Windsor,Act I Scene 3,
52,Merry Wives of Windsor: Novel Summary: Act I S...,"\n\t \nAct I Scene 4\n\tAt Dr. Caius's house, ...",Merry Wives of Windsor,Act I Scene 4,
53,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 1\n\tMrs. Page reads the l...,Merry Wives of Windsor,Act II Scene 1,SCENE 1.\n\nBefore PAGE'S house\n\nEnter MIST...
54,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 2\n\tPistol asks Falstaff ...,Merry Wives of Windsor,Act II Scene 2,
55,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 3\n\t- Act III Scene 1\n\t...,Merry Wives of Windsor,Act II Scene 3 - Act III Scene 1,
56,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 2\n\tFord encounters Mrs....,Merry Wives of Windsor,Act III Scene 2,
57,Merry Wives of Windsor: Novel Summary: Act III...,"\n\t \nAct III Scene 3\n\tAt Ford's house, Mrs...",Merry Wives of Windsor,Act III Scene 3,
58,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 4\n\tFenton tells Anne Pa...,Merry Wives of Windsor,Act III Scene 4,
59,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 5\n\tFalstaff tells Bardo...,Merry Wives of Windsor,Act III Scene 5,


In [412]:
mw_chapter_text = [
    ' '.join([mw_act1_1, mw_act1_2]),
    mw_act1_3,
    mw_act1_4,
    mw_act2_1,
    mw_act2_2,
    ' '.join([mw_act2_3, mw_act3_1]),
    mw_act3_2,
    mw_act3_3,
    mw_act3_4,
    mw_act3_5,
    ' '.join([mw_act4_1, mw_act4_2]),
    ' '.join([mw_act4_3, mw_act4_4]),
    ' '.join([mw_act4_5, mw_act4_6]),
    mw_act5
]

In [413]:
main_df.iloc[50]['chapter_text'] = mw_chapter_text[0]
main_df.iloc[51]['chapter_text'] = mw_chapter_text[1]
main_df.iloc[52]['chapter_text'] = mw_chapter_text[2]
main_df.iloc[53]['chapter_text'] = mw_chapter_text[3]
main_df.iloc[54]['chapter_text'] = mw_chapter_text[4]
main_df.iloc[55]['chapter_text'] = mw_chapter_text[5]
main_df.iloc[56]['chapter_text'] = mw_chapter_text[6]
main_df.iloc[57]['chapter_text'] = mw_chapter_text[7]
main_df.iloc[58]['chapter_text'] = mw_chapter_text[8]
main_df.iloc[59]['chapter_text'] = mw_chapter_text[9]
main_df.iloc[60]['chapter_text'] = mw_chapter_text[10]
main_df.iloc[61]['chapter_text'] = mw_chapter_text[11]
main_df.iloc[62]['chapter_text'] = mw_chapter_text[12]
main_df.iloc[63]['chapter_text'] = mw_chapter_text[13]

In [414]:
main_df[main_df['book_title']=='Merry Wives of Windsor']

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
50,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 1-2\n\tThe Merry Wives of W...,Merry Wives of Windsor,Act I Scene 1-2,"\nSCENE:\nWindsor, and the neighbourhood\n\n\n..."
51,Merry Wives of Windsor: Novel Summary: Act I S...,\n\t \nAct I Scene 3\n\tFalstaff tells the Hos...,Merry Wives of Windsor,Act I Scene 3,"\n\nThe Garter Inn\n\nEnter FALSTAFF, HOST, BA..."
52,Merry Wives of Windsor: Novel Summary: Act I S...,"\n\t \nAct I Scene 4\n\tAt Dr. Caius's house, ...",Merry Wives of Windsor,Act I Scene 4,\n\nDOCTOR CAIUS'S house\n\nEnter MISTRESS QUI...
53,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 1\n\tMrs. Page reads the l...,Merry Wives of Windsor,Act II Scene 1,\n\nBefore PAGE'S house\n\nEnter MISTRESS PAGE...
54,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 2\n\tPistol asks Falstaff ...,Merry Wives of Windsor,Act II Scene 2,\n\nA room in the Garter Inn\n\nEnter FALSTAFF...
55,Merry Wives of Windsor: Novel Summary: Act II ...,\n\t \nAct II Scene 3\n\t- Act III Scene 1\n\t...,Merry Wives of Windsor,Act II Scene 3 - Act III Scene 1,\n\nA field near Windsor\n\nEnter CAIUS and RU...
56,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 2\n\tFord encounters Mrs....,Merry Wives of Windsor,Act III Scene 2,\n\nThe street in Windsor\n\nEnter MISTRESS PA...
57,Merry Wives of Windsor: Novel Summary: Act III...,"\n\t \nAct III Scene 3\n\tAt Ford's house, Mrs...",Merry Wives of Windsor,Act III Scene 3,\n\nFORD'S house\n\nEnter MISTRESS FORD and MI...
58,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 4\n\tFenton tells Anne Pa...,Merry Wives of Windsor,Act III Scene 4,\n\nBefore PAGE'S house\n\nEnter FENTON and AN...
59,Merry Wives of Windsor: Novel Summary: Act III...,\n\t \nAct III Scene 5\n\tFalstaff tells Bardo...,Merry Wives of Windsor,Act III Scene 5,\n\nThe Garter Inn\n\nEnter FALSTAFF and BARDO...


##### Check progress on filling in of missing texts

In [416]:
main_df[main_df['chapter_text']=='']

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text
115,Wuthering Heights: Novel Summary: Chapters 7-8,Chapter 7: Mrs. Dean cleans Heathcliff up and ...,Wuthering Heights,Chapters 7-8,
217,Oliver Twist: Novel Summary: Chapters52-53,\nChapter 52: Fagin was condemned to hang in ...,Oliver Twist,Chapters52-53,


##### Wuthering Heights Ch 7-8

In [417]:
main_df.iloc[115]

chapter_title         Wuthering Heights: Novel Summary: Chapters 7-8
chapter_summary    Chapter 7: Mrs. Dean cleans Heathcliff up and ...
book_title                                         Wuthering Heights
chapters                                                Chapters 7-8
chapter_text                                                        
Name: 115, dtype: object

In [418]:
wh_text = get_text('Wuthering Heights')

In [419]:
wh_7_8 = (wh_text.partition('CHAPTER VII')[2]).partition('CHAPTER IX')[0]
wh_7_8[:300]

'\n\n\n\nCATHY stayed at Thrushcross Grange five weeks:  till Christmas.  By\nthat time her ankle was thoroughly cured, and her manners much\nimproved.  The mistress visited her often in the interval, and\ncommenced her plan of reform by trying to raise her self-respect\nwith fine clothes and flattery, which'

In [420]:
wh_7_8[-300:]

" of the master's fowling-piece,\nwhich he was fond of playing with in his insane excitement, to the\nhazard of the lives of any who provoked, or even attracted his\nnotice too much; and I had hit upon the plan of removing it, that\nhe might do less mischief if he did go the length of firing the\ngun.\n\n\n\n"

In [421]:
main_df.iloc[115]['chapter_text'] = wh_7_8

In [422]:
main_df.iloc[115]

chapter_title         Wuthering Heights: Novel Summary: Chapters 7-8
chapter_summary    Chapter 7: Mrs. Dean cleans Heathcliff up and ...
book_title                                         Wuthering Heights
chapters                                                Chapters 7-8
chapter_text       \n\n\n\nCATHY stayed at Thrushcross Grange fiv...
Name: 115, dtype: object

##### Oliver Twist Ch 52-53

In [423]:
main_df.iloc[217]

chapter_title             Oliver Twist: Novel Summary: Chapters52-53
chapter_summary     \nChapter 52: Fagin was condemned to hang in ...
book_title                                              Oliver Twist
chapters                                               Chapters52-53
chapter_text                                                        
Name: 217, dtype: object

In [424]:
ot_text = get_text('Oliver Twist')

In [425]:
ot_52_53 = ot_text.partition('CHAPTER LII')[2]
ot_52_53[:300]

"\n\nFAGIN'S LAST NIGHT ALIVE\n\nThe court was paved, from floor to roof, with human faces.\nInquisitive and eager eyes peered from every inch of space. From\nthe rail before the dock, away into the sharpest angle of the\nsmallest corner in the galleries, all looks were fixed upon one\nman--Fagin.  Before hi"

In [426]:
ot_52_53[-300:]

'ad\never come back to earth, to visit spots hallowed by the love--the\nlove beyond the grave--of those whom they knew in life, I believe\nthat the shade of Agnes sometimes hovers round that solemn nook.\nI believe it none the less because that nook is in a Church, and\nshe was weak and erring.\n\n\n\n\n\n\n\n\n\n\n'

In [427]:
main_df.iloc[217]['chapter_text'] = ot_52_53

In [428]:
main_df.iloc[217]

chapter_title             Oliver Twist: Novel Summary: Chapters52-53
chapter_summary     \nChapter 52: Fagin was condemned to hang in ...
book_title                                              Oliver Twist
chapters                                               Chapters52-53
chapter_text       \n\nFAGIN'S LAST NIGHT ALIVE\n\nThe court was ...
Name: 217, dtype: object

#### Check that missing chapter texts have been filled in

In [429]:
main_df[main_df['chapter_text']=='']

Unnamed: 0,chapter_title,chapter_summary,book_title,chapters,chapter_text


# Export main dataframe to csv

In [430]:
main_df.to_csv('./data/working_data/chapter_summaries_texts.csv', index=False)