# Presidential Speeches: Sophistication of Speech
This notebook is used for finding the 'readability level' for the text on 991 Presidental speeches that span all US Presidents from George Washington to Donald Trump mid-term 2019.  Readability level = the grade a person would have had to had in order to fully understand the text.

### Setup:

In [1]:
# install textstat if needed by un-hashing and running:
# !pip install textstat

Collecting textstat
  Downloading textstat-0.6.2-py3-none-any.whl (102 kB)
[K     |████████████████████████████████| 102 kB 3.9 MB/s ta 0:00:01
[?25hCollecting pyphen
  Downloading Pyphen-0.9.5-py2.py3-none-any.whl (3.0 MB)
[K     |████████████████████████████████| 3.0 MB 15.6 MB/s eta 0:00:01
[?25hInstalling collected packages: pyphen, textstat
Successfully installed pyphen-0.9.5 textstat-0.6.2


In [2]:
import textstat
import pandas as pd

### Data & Analysis:

In [3]:
# open original (pre-cleaned) transcript files for presidential speeches

transcripts_original = pd.read_csv('csv/transcripts.csv')

In [4]:
# find the reability score grade-level equivalent for each speech
#   place all scores in a dataframe 

grade_levels = []

for transcript in transcripts_original.Transcript:
    grade_level = textstat.text_standard(transcript, float_output=False)
    grade_levels.append(grade_level)
    
grade_levels_df = pd.DataFrame(grade_levels, columns=['grade_level'])

In [5]:
# see the range of grade levels created

grade_levels_df.grade_level.unique()

array(['26th and 27th grade', '44th and 45th grade',
       '22nd and 23rd grade', '15th and 16th grade',
       '12th and 13th grade', '18th and 19th grade',
       '23rd and 24th grade', '17th and 18th grade',
       '16th and 17th grade', '38th and 39th grade', '-1th and 0th grade',
       '25th and 26th grade', '11th and 12th grade',
       '27th and 28th grade', '19th and 20th grade',
       '14th and 15th grade', '10th and 11th grade', '6th and 7th grade',
       '13th and 14th grade', '21st and 22nd grade',
       '36th and 37th grade', '24th and 25th grade',
       '79th and 80th grade', '30th and 31st grade',
       '20th and 21st grade', '31st and 32nd grade',
       '28th and 29th grade', '34th and 35th grade', '8th and 9th grade',
       '35th and 36th grade', '29th and 30th grade',
       '48th and 49th grade', '9th and 10th grade', '32nd and 33rd grade',
       '7th and 8th grade', '5th and 6th grade'], dtype=object)

#### Takeaways:
- Quite a range from -1th grade to 49th grade.  Will bin these to make them easier to evaluate.

In [91]:
# for all grades over '12th and 13th grade' level:

grade_levels_df.loc[(grade_levels_df.grade_level == '44th and 45th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '26th and 27th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '20th and 21st grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '22nd and 23rd grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '15th and 16th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '18th and 19th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '23rd and 24th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '17th and 18th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '16th and 17th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '38th and 39th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '25th and 26th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '27th and 28th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '19th and 20th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '14th and 15th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '21st and 22nd grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '36th and 37th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '24th and 25th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '79th and 80th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '30th and 31st grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '28th and 29th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '34th and 35th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '35th and 36th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '29th and 30th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '48th and 49th grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '32nd and 33rd grade'),'grade_level']='college-level or higher'
grade_levels_df.loc[(grade_levels_df.grade_level == '31st and 32nd grade'),'grade_level']='college-level or higher'


In [92]:
# for all grades under 5th grade level:

grade_levels_df.loc[(grade_levels_df.grade_level == '-1th and 0th grade'),'grade_level']='4th grade and below'

In [101]:
# print unique values again to ensure we've corrected all the labels as intended
#   and also see each value's count

grade_levels_df.grade_level.value_counts()

college-level or higher    391
12th and 13th grade        116
11th and 12th grade        102
8th and 9th grade           97
10th and 11th grade         87
9th and 10th grade          76
13th and 14th grade         47
7th and 8th grade           33
6th and 7th grade           32
5th and 6th grade            9
4th grade and below          1
Name: grade_level, dtype: int64

In [102]:
# open master dataframe with speech information -- we'll be adding the topics to this

potus_speech_master_FINAL = pd.read_csv('csv/potus_speech_master_topic_sentiment.csv')

In [103]:
# add first grade-level listing to master dataframe

potus_speech_master_FINAL['grade_level'] = grade_levels_df['grade_level']

In [104]:
# remove unnecessary first column repeating index

potus_speech_master_FINAL.drop(columns='Unnamed: 0', inplace= True)

In [105]:
potus_speech_master_FINAL.head()

Unnamed: 0,Date,President,Party,Speech Title,Transcript,polarity,subjectivity,Topic,Topic_Percent,Topic_Category,Historical_Period,grade_level
0,1789-04-30,George Washington,Unaffiliated,First Inaugural Address,Fellow Citizens of the Senate and the House of...,0.206467,0.42896,5,0.928934,Public power and duty,1789-1799: New Democratic Nation,college-level or higher
1,1789-10-03,George Washington,Unaffiliated,Thanksgiving Proclamation,Whereas it is the duty of all Nations to ackno...,0.263889,0.47037,5,0.894471,Public power and duty,1789-1799: New Democratic Nation,college-level or higher
2,1790-01-08,George Washington,Unaffiliated,First Annual Message to Congress,Fellow Citizens of the Senate and House of Rep...,0.203224,0.443704,5,0.772133,Public power and duty,1789-1799: New Democratic Nation,college-level or higher
3,1790-12-08,George Washington,Unaffiliated,Second Annual Message to Congress,Fellow citizens of the Senate and House of Rep...,0.173927,0.464649,5,0.686276,Public power and duty,1789-1799: New Democratic Nation,college-level or higher
4,1790-12-29,George Washington,Unaffiliated,Talk to the Chiefs and Counselors of the Senec...,"I the President of the United States, by my ow...",0.097168,0.485468,6,0.407323,"Laws, treaties, and action",1789-1799: New Democratic Nation,12th and 13th grade


In [106]:
# Save the master database for all speeches with sentiment, topics, & readability level as csv
#  to be pulled into Tableau for visualization

potus_speech_master_FINAL.to_csv('csv/potus_speech_master_FINAL.csv')