# [1.3 Plotting the classics](https://www.inferentialthinking.com/chapters/01/3/Plotting_the_Classics.html)

In [10]:
import urllib.request
import pandas as pd

In [11]:
# Read two books, fast!

# Define book url of The Adventures of Huckleberry Finn
huck_finn_url = 'https://www.inferentialthinking.com/data/huck_finn.txt'
# Read book
with urllib.request.urlopen(huck_finn_url) as response:
   html = response.read()
# Decode binary to string
huck_finn_text = html.decode('utf-8')
# Remove all '\r\n' in the original string
huck_finn_text = "".join(huck_finn_text.splitlines())
# Split the string in list according to keyword 'CHAPTER '], and only get the items after 44th
huck_finn_chapters = huck_finn_text.split('CHAPTER ')[44:]

# Define book url of Little Women*
little_women_url = 'https://www.inferentialthinking.com/data/little_women.txt'
# Read book
with urllib.request.urlopen(little_women_url) as response:
   html = response.read()
# Decode binary to string
little_women_text = html.decode('utf-8')
# Remove all '\r\n' in the original string
little_women_text = "".join(little_women_text.splitlines())
# Split the string in list according to keyword 'CHAPTER '], and only get the items after 1st
little_women_chapters = little_women_text.split('CHAPTER ')[1:]

In [12]:
pd.DataFrame(data=huck_finn_chapters, columns=['Chapters'])

Unnamed: 0,Chapters
0,I.YOU don't know about me without you have rea...
1,II.WE went tiptoeing along a path amongst the ...
2,"III.WELL, I got a good going-over in the morni..."
3,"IV.WELL, three or four months run along, and i..."
4,V.I had shut the door to. Then I turned aroun...
5,"VI.WELL, pretty soon the old man was up and ar..."
6,"VII.""GIT up! What you 'bout?""I opened my eyes..."
7,VIII.THE sun was up so high when I waked that ...
8,IX.I wanted to go and look at a place right ab...
9,X.AFTER breakfast I wanted to talk about the d...


## [1.3.1 Literary Characters](https://www.inferentialthinking.com/chapters/01/3/1/Literary_Characters.html)

In [13]:
import numpy as np
import plotly.express as px

In [14]:
# Get data from huck_finn_chapters
data = {
    'Jim': np.char.count(huck_finn_chapters, 'Jim'),
    'Tom': np.char.count(huck_finn_chapters, 'Tom'),
    'Huck': np.char.count(huck_finn_chapters, 'Huck')
}
# Define counts as a dataframe from data
counts = pd.DataFrame(data)
# Return cumulative sum over counts
cum_counts = counts.cumsum()
# Add a new attribute(column) Chapter
cum_counts['Chapter'] = np.arange(1, 44, 1)
cum_counts

Unnamed: 0,Jim,Tom,Huck,Chapter
0,0,6,3,1
1,16,30,5,2
2,16,35,7,3
3,24,35,8,4
4,24,35,8,5
5,24,35,10,6
6,24,37,10,7
7,46,39,15,8
8,57,39,16,9
9,76,39,16,10


In [18]:
# Plot the cumulative counts:
# how many times in Chapter 1, how many times in Chapters 1 and 2, and so on.

fig = px.line(cum_counts, x='Chapter', y=['Jim', 'Tom', 'Huck'], title='Cumulative Number of Times Each Name Appears')
fig.show()

ValueError: All arguments should have the same length. The length of argument `y` is 3, whereas the length of  previously-processed arguments ['Chapter'] is 47

In [19]:
# The chapters of Little Women, in a table
pd.DataFrame(data=little_women_chapters, columns=['Chapters'])

Unnamed: 0,Chapters
0,"ONEPLAYING PILGRIMS""Christmas won't be Christm..."
1,TWOA MERRY CHRISTMASJo was the first to wake i...
2,"THREETHE LAURENCE BOY""Jo! Jo! Where are you?..."
3,"FOURBURDENS""Oh, dear, how hard it does seem to..."
4,"FIVEBEING NEIGHBORLY""What in the world are you..."
5,SIXBETH FINDS THE PALACE BEAUTIFULThe big hous...
6,"SEVENAMY'S VALLEY OF HUMILIATION""That boy is a..."
7,"EIGHTJO MEETS APOLLYON""Girls, where are you go..."
8,"NINEMEG GOES TO VANITY FAIR""I do think it was ..."
9,"TENTHE P.C. AND P.O.As spring came on, a new s..."


In [20]:
# Get data little_women_chapters
data = {
    'Amy': np.char.count(little_women_chapters, 'Amy'),
    'Beth': np.char.count(little_women_chapters, 'Beth'),
    'Jo': np.char.count(little_women_chapters, 'Jo'),
    'Meg': np.char.count(little_women_chapters, 'Meg'),
    'Laurie': np.char.count(little_women_chapters, 'Laurie')
}
# Define counts as a dataframe from data
counts = pd.DataFrame(data)
# Return cumulative sum over counts
cum_counts = counts.cumsum()
# Add a new attribute(column) Chapter
cum_counts['Chapter'] = np.arange(1, 48, 1)
cum_counts

Unnamed: 0,Amy,Beth,Jo,Meg,Laurie,Chapter
0,23,26,44,26,0,1
1,36,38,65,46,0,2
2,38,40,127,82,16,3
3,52,58,161,99,16,4
4,58,72,216,112,51,5
5,64,100,229,117,60,6
6,91,105,238,122,67,7
7,139,114,309,138,84,8
8,142,119,330,209,108,9
9,147,124,342,213,112,10


In [21]:
# Plot the cumulative counts.

fig = px.line(cum_counts, x='Chapter', y=['Amy', 'Beth', 'Jo', 'Meg', 'Laurie'], title='Cumulative Number of Times Each Name Appears')
fig.show()

## [1.3.2 Another Kind of Character](https://www.inferentialthinking.com/chapters/01/3/2/Another_Kind_Of_Character.html)

In [22]:
# In each chapter, count the number of all characters;
# call this the "length" of the chapter.
# Also count the number of periods.

chars_periods_huck_finn = pd.DataFrame({
        'Huck Finn Chapter Length': [len(s) for s in huck_finn_chapters],
        'Number of Periods': np.char.count(huck_finn_chapters, '.')})

chars_periods_little_women = pd.DataFrame({
        'Little Women Chapter Length': [len(s) for s in little_women_chapters],
        'Number of Periods': np.char.count(little_women_chapters, '.')})

In [23]:
chars_periods_huck_finn

Unnamed: 0,Huck Finn Chapter Length,Number of Periods
0,6970,66
1,11874,117
2,8460,72
3,6755,84
4,8095,91
5,14434,125
6,13112,127
7,22031,249
8,8005,71
9,6984,70


In [24]:
chars_periods_little_women

Unnamed: 0,Little Women Chapter Length,Number of Periods
0,21496,189
1,21941,188
2,20335,231
3,25213,195
4,23115,255
5,14456,140
6,14247,131
7,22214,214
8,33352,337
9,19163,185


In [55]:
import plotly.graph_objects as go

fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=chars_periods_huck_finn.loc[:, 'Number of Periods'],
                    y=chars_periods_huck_finn.loc[:, 'Huck Finn Chapter Length'],
                    mode='markers',
                    name='Huck Finn Chapter'))
fig.add_trace(go.Scatter(x=chars_periods_little_women.loc[:, 'Number of Periods'],
                    y=chars_periods_little_women.loc[:, 'Little Women Chapter Length'],
                    mode='markers',
                    name='Little Women'))
fig.update_layout(
    xaxis_title='Number of periods in chapter',
    yaxis_title='Number of characters in chapter',
    width=1200, height=800
    )
  
fig.show()