# Trump Twitter Analysis



In [None]:
import os
import numpy as np
import tensorflow as tf
import pandas as pd

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
# configs
DATAFILE = 'trump_tweets_11_17.json'

In [None]:
# read in json file as pd dataframe
trump_tweets = pd.read_json(DATAFILE)
print('number of rows: %d' % trump_tweets.shape[0])

In [None]:
# id_str column is unique
print(trump_tweets['id_str'].is_unique)

# set id_str as unique identifier
trump_tweets = trump_tweets.set_index('id_str')

In [144]:
trump_tweets.head()

Unnamed: 0_level_0,created_at,favorite_count,is_retweet,retweet_count,source,text
id_str,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1196148537525977088,2019-11-17 19:30:09,8670,0.0,2663,Twitter for iPhone,.@SteveScalise blew the nasty &amp; obnoxious ...
1196147508445163520,2019-11-17 19:26:04,2384,0.0,789,Twitter for iPhone,.@SteveScalese blew the nasty &amp; obnoxious ...
1196134600227078144,2019-11-17 18:34:46,14897,0.0,4349,Twitter for iPhone,Thanks Eric! https://t.co/6Ai7bqto3P
1196134448183566336,2019-11-17 18:34:10,0,1.0,16239,Twitter for iPhone,RT @EricTrump: I hope no one who serves our co...
1196134298325266432,2019-11-17 18:33:34,0,1.0,2505,Twitter for iPhone,RT @EricTrump: Trump Washington D.C! @TrumpDC ...


In [145]:
# remove rows that aren't iPhone/Android
trump_tweets = trump_tweets[trump_tweets.source.isin(['Twitter for Android', 'Twitter for iPhone'])]
print('number of tweets from iPhone + Android:\t%d' % trump_tweets.shape[0])

number of tweets from iPhone + Android:	18599


In [146]:
trump_tweets.head()

Unnamed: 0_level_0,created_at,favorite_count,is_retweet,retweet_count,source,text
id_str,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1196148537525977088,2019-11-17 19:30:09,8670,0.0,2663,Twitter for iPhone,.@SteveScalise blew the nasty &amp; obnoxious ...
1196147508445163520,2019-11-17 19:26:04,2384,0.0,789,Twitter for iPhone,.@SteveScalese blew the nasty &amp; obnoxious ...
1196134600227078144,2019-11-17 18:34:46,14897,0.0,4349,Twitter for iPhone,Thanks Eric! https://t.co/6Ai7bqto3P
1196134448183566336,2019-11-17 18:34:10,0,1.0,16239,Twitter for iPhone,RT @EricTrump: I hope no one who serves our co...
1196134298325266432,2019-11-17 18:33:34,0,1.0,2505,Twitter for iPhone,RT @EricTrump: Trump Washington D.C! @TrumpDC ...


In [147]:
# extract target array
target_array = trump_tweets.pop('source')

In [148]:
del trump_tweets['created_at']

In [149]:
trump_tweets.text = trump_tweets.text.astype(str)

In [150]:
trump_tweets.head()

Unnamed: 0_level_0,favorite_count,is_retweet,retweet_count,text
id_str,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1196148537525977088,8670,0.0,2663,.@SteveScalise blew the nasty &amp; obnoxious ...
1196147508445163520,2384,0.0,789,.@SteveScalese blew the nasty &amp; obnoxious ...
1196134600227078144,14897,0.0,4349,Thanks Eric! https://t.co/6Ai7bqto3P
1196134448183566336,0,1.0,16239,RT @EricTrump: I hope no one who serves our co...
1196134298325266432,0,1.0,2505,RT @EricTrump: Trump Washington D.C! @TrumpDC ...


In [153]:
trump_tweets = trump_tweets.text

In [154]:
trump_tweets.head()

id_str
1196148537525977088    .@SteveScalise blew the nasty &amp; obnoxious ...
1196147508445163520    .@SteveScalese blew the nasty &amp; obnoxious ...
1196134600227078144                 Thanks Eric! https://t.co/6Ai7bqto3P
1196134448183566336    RT @EricTrump: I hope no one who serves our co...
1196134298325266432    RT @EricTrump: Trump Washington D.C! @TrumpDC ...
Name: text, dtype: object

In [155]:
# import data
dataset = tf.data.Dataset.from_tensor_slices((trump_tweets.values, target_array.values))

In [156]:
# shuffle
train_dataset = dataset.shuffle(len(trump_tweets)).batch(1)