In [5]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import re
from fastai.text import *

pd.set_option('display.max_colwidth', 500)


In [6]:
df09 = pd.read_json('condensed_2009.json')
df10 = pd.read_json('condensed_2010.json')
df11 = pd.read_json('condensed_2011.json')
df12 = pd.read_json('condensed_2012.json')
df13 = pd.read_json('condensed_2013.json')
df14 = pd.read_json('condensed_2014.json')
df15 = pd.read_json('condensed_2015.json')
df16 = pd.read_json('condensed_2016.json')
df17 = pd.read_json('condensed_2017.json')
df18 = pd.read_json('condensed_2018.json')
df19 = pd.read_json('2019.json')
df20 = pd.read_json('2020.json')

years = [df09, df10, df11, df12, df13, df14, 
         df14, df15, df16, df17, df18, df19, df20]

df = pd.concat(years)
df = df.sort_values('created_at').reset_index(drop=True)

df['created_date'] = [d.date() for d in df['created_at']]
df['created_time'] = [d.time() for d in df['created_at']]

# takes a retweet as a string and removes the 'RT @name: '
def remove_rt_signature(tweet):
    split_tweet = tweet.split(':', 1)
    return split_tweet[1][1:]

# make a new column of all the tweets with the retweets cleaned up
df['text_cleaned'] = df.apply(lambda row: row['text'] if row['is_retweet'] == False 
                                    else remove_rt_signature(row['text']), axis=1)

# add a validation column to indicate the testing data. Use the most recent 20%
df['is_valid'] = False

# Validation data will be most recent 10% of the tweets

# test_size = len(df['is_valid']) // 10 

# df['is_valid'].iloc[-test_size:] = True    



In [7]:
df.tail()

Unnamed: 0,source,id_str,text,created_at,retweet_count,in_reply_to_user_id_str,favorite_count,is_retweet,created_date,created_time,text_cleaned,is_valid
53223,Twitter for iPhone,1251670585165459456,"Great new book out by @realKTMcFarland, “Revolution: Trump, Washington, and We the People”. Get your copy today! https://t.co/KBkladnmPQ",2020-04-19 00:34:57+00:00,7029,,25678,False,2020-04-19,00:34:57,"Great new book out by @realKTMcFarland, “Revolution: Trump, Washington, and We the People”. Get your copy today! https://t.co/KBkladnmPQ",False
53224,Twitter for iPhone,1251670667965104128,"RT @realDonaldTrump: Our GREAT Senator from South Carolina, @SenatorTimScott just released a fantastic new book, “OPPORTUNITY KNOCKS: How H…",2020-04-19 00:35:16+00:00,13202,,0,True,2020-04-19,00:35:16,"Our GREAT Senator from South Carolina, @SenatorTimScott just released a fantastic new book, “OPPORTUNITY KNOCKS: How H…",False
53225,Twitter for iPhone,1251703696385290240,"RT @realDonaldTrump: Great news, thank you!",2020-04-19 02:46:31+00:00,11778,,0,True,2020-04-19,02:46:31,"Great news, thank you!",False
53226,Twitter for iPhone,1251703767067754496,"RT @realDonaldTrump: Thank you @Honeywell, great job!",2020-04-19 02:46:48+00:00,10597,,0,True,2020-04-19,02:46:48,"Thank you @Honeywell, great job!",False
53227,Twitter for iPhone,1251713028246122496,CONGRATULATIONS @AF_Academy! https://t.co/27HxXLNbd8,2020-04-19 03:23:36+00:00,640,,2536,False,2020-04-19,03:23:36,CONGRATULATIONS @AF_Academy! https://t.co/27HxXLNbd8,False


In [4]:
df.to_csv('tweet_df.csv')

In [23]:
# make df of label, text, is_valid 

# label will be 'is_retweet'


texts = df[['is_retweet', 'text_cleaned', 'is_valid']]

In [24]:
texts = texts.rename(columns={'is_retweet':'label', 'text_cleaned':'text'})

In [25]:
texts

Unnamed: 0,label,text,is_valid
0,False,Be sure to tune in and watch Donald Trump on Late Night with David Letterman as he presents the Top Ten List tonight!,False
1,False,Donald Trump will be appearing on The View tomorrow morning to discuss Celebrity Apprentice and his new book Think Like A Champion!,False
2,False,Donald Trump reads Top Ten Financial Tips on Late Show with David Letterman: http://tinyurl.com/ooafwn - Very funny!,False
3,False,New Blog Post: Celebrity Apprentice Finale and Lessons Learned Along the Way: http://tinyurl.com/qlux5e,False
4,False,"""My persona will never be that of a wallflower - I’d rather build walls than cling to them"" --Donald J. Trump",False
...,...,...,...
53223,False,"Great new book out by @realKTMcFarland, “Revolution: Trump, Washington, and We the People”. Get your copy today! https://t.co/KBkladnmPQ",True
53224,True,"Our GREAT Senator from South Carolina, @SenatorTimScott just released a fantastic new book, “OPPORTUNITY KNOCKS: How H…",True
53225,True,"Great news, thank you!",True
53226,True,"Thank you @Honeywell, great job!",True


In [26]:
!pwd

/Users/cooper/Desktop/springboard/tweets/trump_tweets


In [27]:
# Define path, training data, and test data

path = Path('/Users/cooper/Desktop/springboard/tweets/trump_tweets')

train_df, valid_df = texts[texts['is_valid'] == True], texts[texts['is_valid'] == False]



In [29]:
# create data bunches

data_lm = TextLMDataBunch.from_df(path, train_df, valid_df, text_cols=1, bs=32)

data_clas = TextClasDataBunch.from_df(path, train_df, valid_df, text_cols=1, label_cols=0, bs=32)

In [30]:
data_lm.show_batch()

idx,text
0,’s inquiry is officially the first presidential impeachment inquiry in modern history authorized by members … xxbos xxup rt xxunk : xxmaj today ’s vote on the impeachment resolution is a continuation of the farce that xxmaj speaker xxmaj pelosi and xxmaj chairman xxmaj schiff are trying … xxbos xxup rt xxunk : xxmaj voted xxup against xxmaj impeachment xxmaj resolution . xxmaj voted xxup for xxmaj constitution and xxmaj
1,"xxmaj spend your money well ! xxbos xxmaj many people say they know me , claiming to be “ best friends ” and really close etc . , when i do n’t know these people at all . xxmaj this happens , i xxunk , to all who become xxmaj president . xxmaj with that being stated , i do n’t know , to the best of my knowledge ,"
2,xxmaj totally ! https : / / t.co / xxunk xxbos xxmaj thank you to xxup lsu and xxmaj alabama for a great game ! xxbos xxup rt xxunk : xxmaj alabama loves xxmaj president @realdonaldtrump . xxmaj this is what the fake news media wo n’t show you . # xxup maga 🇺 🇸 https : / / t.co / xxunk … xxbos xxmaj but the xxmaj fake xxmaj
3,"answer the question . xxmaj that would be the end of a case run by normal people ! - but not xxmaj shifty ! xxbos xxmaj hit xxmaj new xxmaj stock xxmaj market record again yesterday , the 20th time this year , with xxup great potential for the future . xxup usa is where the action is . xxmaj companies and jobs are coming back like never before !"
4,"xxmaj lt . xxmaj col . xxmaj vindman was offered the position of xxmaj defense xxmaj minister for the xxmaj ukrainian xxmaj government xxup three times ! # xxmaj impeachmen … xxbos xxmaj in the 3,500 pages of transcripts released so far , there was only one reference to bribery . xxmaj xxunk to describe xxmaj vice p … xxbos .@repratcliffe nails it . \n \n xxmaj democrats poll"


In [31]:
data_clas.show_batch()

text,target
"xxbos xxrep 4 . xxmaj this is the biggest political crime in xxmaj american xxmaj history , by far . xxup simply xxup put , xxup the xxup party xxup in xxup power xxup illegally xxup spied xxup on xxup my xxup campaign , xxup both xxup before xxup and xxup after xxup the xxup election , xxup in xxup order xxup to xxup change xxup or xxup xxunk xxup",False
"xxbos xxmaj our case against xxunk ’ , xxunk ’ , xxunk ’ xxmaj adam “ xxmaj shifty ” xxmaj schiff , xxmaj cryin ’ xxmaj chuck xxmaj schumer , xxmaj nervous xxmaj nancy xxmaj pelosi , their leader , dumb as a rock xxup aoc , & & the entire xxmaj radical xxmaj left , xxmaj do xxmaj nothing xxmaj democrat xxmaj party , starts today at 10:00 xxup",False
"xxbos xxmaj more than anything else , the xxmaj radical xxmaj left , xxmaj do xxmaj nothing xxmaj democrats , like xxup aoc , xxmaj omar , xxmaj cryin ’ xxmaj chuck , xxmaj nervous xxmaj nancy & & xxmaj shifty xxmaj schiff , are angry & & “ deranged ” over the fact that xxmaj republicans are up to 191 xxmaj federal xxmaj judges & & xxmaj two xxmaj",False
"xxbos xxrep 4 . ” i xxup want xxup nothing ! i xxup want xxup nothing ! i xxup want xxup no xxup quid xxup pro xxup quo ! xxup tell xxup president xxup zelensky xxup to xxup do xxup the xxup right xxup thing ! ” xxmaj later , xxmaj ambassador xxmaj sondland said that i told him , “ xxmaj good , go tell the truth ! ”",False
"xxbos .@repkevinbrady ( r ) of xxmaj texas-08 loves xxmaj texas & & our xxmaj country . xxmaj he has been a xxup great xxmaj congressman & & supporter of # xxup maga . xxmaj strong on xxmaj crime , xxmaj border , xxmaj military , xxmaj vets and your xxup 2a , he is the best xxmaj tax xxmaj xxunk in xxup d.c. xxmaj kevin has my xxmaj complete",False


In [32]:
data_lm.save('data_lm_export.pkl')
data_clas.save('data_clas_export.pkl')

In [33]:
data_lm = load_data(path, 'data_lm_export.pkl')
data_clas = load_data(path, 'data_clas_export.pkl', bs=16)

In [34]:
learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.5)
learn.fit_one_cycle(1, 1e-2) # one time through without last layer

epoch,train_loss,valid_loss,accuracy,time
0,4.420157,3.919139,0.279094,07:20


In [35]:
learn.unfreeze() # train full network
learn.fit_one_cycle(1, 1e-3) # low learning rate stops weights from changing too much

epoch,train_loss,valid_loss,accuracy,time
0,3.572575,3.726149,0.30921,07:45


In [36]:
learn.predict("can you imagine", n_words=15)

'can you imagine the Continuing Media again grievances how the Ukrainian Congress has signed'

In [37]:
learn.save_encoder('ft_enc')

In [38]:
learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5)
learn.load_encoder('ft_enc')

RNNLearner(data=TextClasDataBunch;

Train: LabelList (5322 items)
x: TextList
xxbos xxup rt xxup @flotus : xxmaj trick - or - xxmaj treat @whitehouse # xxmaj xxunk https : / / t.co / xxunk,xxbos xxup rt @stevescalise : 🚨 xxup breaking : xxmaj dems are voting to continue their xxmaj soviet - style impeachment scheme . 
 
  xxmaj they 're not interested in due process . 
 
  t …,xxbos xxup rt @repdougcollins : xxmaj fact : xxmaj pelosi ’s inquiry is officially the first presidential impeachment inquiry in modern history authorized by members …,xxbos xxup rt xxunk : xxmaj today ’s vote on the impeachment resolution is a continuation of the farce that xxmaj speaker xxmaj pelosi and xxmaj chairman xxmaj schiff are trying …,xxbos xxup rt xxunk : xxmaj voted xxup against xxmaj impeachment xxmaj resolution . xxmaj voted xxup for xxmaj constitution and xxmaj america . xxmaj the fight goes on . xxmaj god xxmaj bless xxmaj america !
y: CategoryList
False,False,False,False,False
Path: /Users/coope

In [39]:
learn.fit_one_cycle(1, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.492556,0.296251,0.896527,13:50


In [74]:
# unfreeze and fine-tune

learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(5e-3/2., 5e-3)) # train the first layers at ... and the last layers at .. and the rest in between

epoch,train_loss,valid_loss,accuracy,time
0,0.236674,0.228476,0.946598,21:06


In [40]:
learn.unfreeze() # fine tune all the layers 
learn.fit_one_cycle(1, slice(2e-3/100, 2e-3))

epoch,train_loss,valid_loss,accuracy,time
0,0.412687,0.247538,0.939193,13:17


In [41]:
# is_retweet = False
learn.predict("States are safely coming back. Our Country is starting to OPEN FOR BUSINESS again. Special care is, and always will be, given to our beloved seniors (except me!). Their lives will be better than ever...WE LOVE YOU ALL!")

(Category False, tensor(0), tensor([0.9478, 0.0522]))

In [42]:
# is_retweet = True 
learn.predict("The States need to step up their own testing and take responsibility and if they can't, perhaps the politicians in those States needs to be replaced?")

(Category False, tensor(0), tensor([0.9129, 0.0871]))

In [43]:
texts

Unnamed: 0,label,text,is_valid
0,False,Be sure to tune in and watch Donald Trump on Late Night with David Letterman as he presents the Top Ten List tonight!,False
1,False,Donald Trump will be appearing on The View tomorrow morning to discuss Celebrity Apprentice and his new book Think Like A Champion!,False
2,False,Donald Trump reads Top Ten Financial Tips on Late Show with David Letterman: http://tinyurl.com/ooafwn - Very funny!,False
3,False,New Blog Post: Celebrity Apprentice Finale and Lessons Learned Along the Way: http://tinyurl.com/qlux5e,False
4,False,"""My persona will never be that of a wallflower - I’d rather build walls than cling to them"" --Donald J. Trump",False
...,...,...,...
53223,False,"Great new book out by @realKTMcFarland, “Revolution: Trump, Washington, and We the People”. Get your copy today! https://t.co/KBkladnmPQ",True
53224,True,"Our GREAT Senator from South Carolina, @SenatorTimScott just released a fantastic new book, “OPPORTUNITY KNOCKS: How H…",True
53225,True,"Great news, thank you!",True
53226,True,"Thank you @Honeywell, great job!",True


In [46]:
!ls

2019.json            condensed_2013.json  data_clas_export.pkl
2020.json            condensed_2014.json  data_lm_export.pkl
condensed_2009.json  condensed_2015.json  [34mmodels[m[m
condensed_2010.json  condensed_2016.json  texts.csv
condensed_2011.json  condensed_2017.json  trump_tweets.ipynb
condensed_2012.json  condensed_2018.json  tweet_df.csv
