In [433]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score
from sklearn.utils import resample
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import math
import datetime
from textblob import TextBlob 

%matplotlib inline

import seaborn as sns
sns.set(style='whitegrid')
# plt.rcParams.update({'font.size': 22})

plt.style.use('seaborn')
pd.set_option('display.width', 1500)
pd.set_option('display.max_columns', 100)

# FINDING OUR TRAINING SET

## Importing full Trump dataset

- `trump_tweet_original_df` includes all the scraped data for Trump's tweet. Original dataset and no filtering is done on this dataset.
- `trump_tweet_with_features` includes scraped Trump tweets that are not retweets. Features such as sentiments are added too.

In [434]:
##### Data Import --- ALL TRUMP's TWEET INCL. NONRTs and RTs
trump_tweet_original_df = pd.read_csv("data/tweets_scraped_11_07_2019.csv")

# Convering 'created_at' to datetime format.
trump_tweet_original_df['created_at'] = pd.to_datetime(trump_tweet_original_df['created_at'])

##### IMPORTING THE DATAFRAME WITH ALL THE FEATURES GENERATED
trump_tweet_with_features = pd.read_csv("data/full.csv")

# Converting 'created_at' to datetime objects.
trump_tweet_with_features['created_at'] = pd.to_datetime(trump_tweet_with_features['created_at'])


## Filtering for Pre-iPhone Tweets

In [435]:
# Filter out DT's tweets before he switched to the iPhone. Then, we have a base training set
# of Trump's tweet if we filter for all Android Tweets.
pre_iphone_tweets = trump_tweet_original_df[trump_tweet_original_df['created_at'] < pd.to_datetime('03-24-2017 14:41:15')]

# Setting up ground-truth variable. 0 is non-DT, and 1 is DT
pre_iphone_tweets['sender'] = (pre_iphone_tweets['source'] == 'Twitter for Android').astype(int)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [436]:
######## GENERATE FEATURES - NOT USING THIS

# def generate_features(df_input):
    
#     df = df_input.copy()
    
#     df['polarity'] = df['text'].apply(lambda text: TextBlob(text).sentiment[0])
#     df['subjectivity'] = df['text'].apply(lambda text: TextBlob(text).sentiment[1])

#     df['hash'] = (df['text'].str.contains('#') == True).astype(int)
#     df['linked'] = (df['text'].str.contains('https://t.co/') == True).astype(int)
#     df['dot'] = (df['text'].str.contains('...') == True).astype(int)
    
#     df['year'] = df.created_at.dt.year
#     df['month'] = df.created_at.dt.month
#     df['day'] = df.created_at.dt.day
#     df['hour'] = df.created_at.dt.hour
#     df['minute'] = df.created_at.dt.minute
# #     df = df.drop(['created_at', 'source'], axis = 1, errors='ignore')
    
#     return df


In [437]:
print(trump_tweet_with_features.columns.values)
print("Number of Columns: {}".format(len(trump_tweet_with_features.columns)))

['Unnamed: 0' 'source' 'text' 'created_at' 'retweet_count'
 'favorite_count' 'is_retweet' 'id_str' 'date' 'hour' 'min' 'time'
 'linked' 'exist_stock' 'exist_market' 'exist_agreement'
 'exist_negotiator' 'exist_negotiation' 'exist_trade' 'exist_china'
 'exist_economy' 'exist_job' 'exist_tariff' 'exist_employment' 'exist_s&p'
 'exist_auto' 'exist_farmer' 'economy_word_occurrance' 'hash' 'dot'
 'polarity' 'subjectivity']
Number of Columns: 32


In [438]:
pre_iphone_tweets_features = trump_tweet_with_features[trump_tweet_with_features['created_at'] < pd.to_datetime('03-24-2017 14:41:15')]

pre_iphone_tweets_features['sender'] = (pre_iphone_tweets_features['source'] == 'Twitter for Android').astype(int)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [439]:
training_set = pre_iphone_tweets_features[['created_at', 'hour', 'linked',
                                'hash', 'dot', 'polarity', 'subjectivity', 'sender', 'text']]

In [440]:
training_set['year'] = training_set.created_at.dt.year
training_set['month'] = training_set.created_at.dt.month
training_set['day'] = training_set.created_at.dt.day
training_set['hour'] = training_set.created_at.dt.hour
training_set['minute'] = training_set.created_at.dt.minute
training_set = training_set.drop(['created_at'], axis = 1, errors='ignore')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

In [441]:
training_set.head()

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,sender,text,year,month,day,minute
0,11,False,0,0,0.45,0.833333,1,such a beautiful and important evening! the fo...,2016,11,9,36
1,19,True,0,0,1.0,1.0,0,happy 241st birthday to the u.s. marine corps!...,2016,11,10,31
2,2,False,0,0,0.58,0.676667,1,a fantastic day in d.c. met with president oba...,2016,11,11,10
3,2,False,0,0,0.009375,0.675,1,just had a very open and successful presidenti...,2016,11,11,19
4,11,False,0,0,0.41,0.563333,1,love the fact that the small groups of protest...,2016,11,11,14


In [442]:
training_set.columns.values

array(['hour', 'linked', 'hash', 'dot', 'polarity', 'subjectivity',
       'sender', 'text', 'year', 'month', 'day', 'minute'], dtype=object)

# Pre-iPhone Train-Test Split

In [443]:
pre_iphone_train, pre_iphone_test = train_test_split(training_set.drop('text', axis=1, errors='ignore'),
                                                     test_size=0.2, 
                                                     stratify = training_set.sender)

In [444]:
x_pre_iphone_train = pre_iphone_train.drop('sender', axis=1, errors='ignore')
y_pre_iphone_train = pre_iphone_train.sender

x_pre_iphone_test = pre_iphone_test.drop('sender', axis=1, errors='ignore')
y_pre_iphone_test = pre_iphone_test.sender

In [445]:
features = x_pre_iphone_train.columns.values

# Baseline Random Forest Classifier

In [446]:
model = RandomForestClassifier()

In [447]:
model.fit(x_pre_iphone_train, y_pre_iphone_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [448]:
## TRAINING ACCURACY
model.score(x_pre_iphone_train, y_pre_iphone_train)

0.9981378026070763

In [449]:
## TEST ACCURACY
model.score(x_pre_iphone_test, y_pre_iphone_test)

0.8518518518518519

Baseline Accuracies:
- `Training`: 0.994413407821229
- `Test`: 0.9333333333333333

# Trying Out TFIDF Vector

In [450]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [451]:
tfidf = TfidfVectorizer(max_df=0.95,min_df=0.02)

In [452]:
tfidf.fit(trump_tweet_original_df.text)

TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
                dtype=<class 'numpy.float64'>, encoding='utf-8',
                input='content', lowercase=True, max_df=0.95, max_features=None,
                min_df=0.02, ngram_range=(1, 1), norm='l2', preprocessor=None,
                smooth_idf=True, stop_words=None, strip_accents=None,
                sublinear_tf=False, token_pattern='(?u)\\b\\w\\w+\\b',
                tokenizer=None, use_idf=True, vocabulary=None)

In [453]:
n_cols = len(tfidf.get_feature_names())

In [454]:
tfidf_vector = pd.DataFrame(tfidf.transform(training_set.text).toarray(), columns = tfidf.get_feature_names())

In [455]:
training_with_tfidf = pd.concat([training_set, tfidf_vector], axis = 1).drop('text', axis = 1, errors='ignore')

In [456]:
training_with_tfidf.head()

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,sender,year,month,day,minute,about,after,again,against,all,also,am,america,american,amp,an,and,are,as,at,back,bad,be,because,been,before,being,better,big,border,but,by,can,china,co,collusion,congress,country,crime,day.1,deal,democrats,dems,did,...,state,states,strong,tax,than,thank,that,the,their,them,there,they,this,time,to,today,total,trade,trump,two,united,up,us,very,vote,wall,want,was,way,we,well,were,what,when,which,whitehouse,who,why,will,win,witch,with,work,working,world,would,year.1,years,you,your
0,11,False,0,0,0.45,0.833333,1,2016,11,9,36,0.0,0.0,0.314953,0.0,0.230941,0.0,0.0,0.0,0.0,0.0,0.0,0.262353,0.0,0.258873,0.0,0.0,0.0,0.199657,0.0,0.0,0.339969,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.10258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.207097,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.395671,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,19,True,0,0,1.0,1.0,0,2016,11,10,31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285876,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.463842,0.0,0.180355,0.0,0.0,0.0,0.0,0.0,0.0,0.219746,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.381256,0.554051
2,2,False,0,0,0.58,0.676667,1,2016,11,11,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.364535,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.320658,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.216091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2,False,0,0,0.009375,0.675,1,2016,11,11,19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.147141,0.22142,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.267925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.115065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.553239,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,11,False,0,0,0.41,0.563333,1,2016,11,11,14,0.0,0.0,0.0,0.0,0.292238,0.0,0.0,0.0,0.0,0.0,0.0,0.165994,0.0,0.0,0.0,0.0,0.0,0.252651,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.331288,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.23959,0.259616,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.262065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.250346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [457]:
pre_iphone_train_with_tfidf, pre_iphone_test_with_tfidf = train_test_split(training_with_tfidf,
                                                                           test_size=0.2, 
                                                                           stratify = training_with_tfidf.sender)

In [458]:
x_pre_iphone_train_wtfidf = pre_iphone_train_with_tfidf.drop('sender', axis=1, errors='ignore')
y_pre_iphone_train_wtfidf = pre_iphone_train_with_tfidf.sender

x_pre_iphone_test_wtfidf = pre_iphone_test_with_tfidf.drop('sender', axis=1, errors='ignore')
y_pre_iphone_test_wtfidf = pre_iphone_test_with_tfidf.sender

In [459]:
model_with_tfidf = RandomForestClassifier()

In [460]:
model_with_tfidf.fit(x_pre_iphone_train_wtfidf, y_pre_iphone_train_wtfidf)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [461]:
model_with_tfidf.score(x_pre_iphone_train_wtfidf, y_pre_iphone_train_wtfidf)

0.9888268156424581

In [462]:
model_with_tfidf.score(x_pre_iphone_test_wtfidf, y_pre_iphone_test_wtfidf)

0.8814814814814815

Baseline Accuracies:
- `Training`: 0.9906890130353817
- `Test`: 0.8740740740740741

# TEST ON MANUALLY LABELLED SET

In [463]:
manually_labelled_set = pd.read_csv("manul100.csv")
manually_labelled_set['created_at'] = pd.to_datetime(manually_labelled_set['created_at'])


In [464]:
manually_labelled_set['year'] = manually_labelled_set.created_at.dt.year
manually_labelled_set['month'] = manually_labelled_set.created_at.dt.month
manually_labelled_set['day'] = manually_labelled_set.created_at.dt.day
manually_labelled_set['hour'] = manually_labelled_set.created_at.dt.hour
manually_labelled_set['minute'] = manually_labelled_set.created_at.dt.minute
manually_labelled_set = manually_labelled_set[np.append(features, ['text', 'AVG'])]

In [465]:
manually_labelled_set.head()

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text,AVG
0,21,True,0,0,0.125,0.216667,2017,8,12,19,we must remember this truth: no matter our col...,1.0
1,11,False,0,0,0.233333,0.133333,2019,10,23,36,republicans are going to fight harder than eve...,4.5
2,1,False,0,0,0.384333,0.776667,2019,9,11,4,i am pleased to endorse governor mike parson o...,4.0
3,14,False,0,0,0.25,0.21875,2019,2,24,56,poll: suburban women are coming back into the ...,1.5
4,17,True,1,0,0.5,0.45,2019,5,4,53,today may 4th - is international firefighters ...,1.0


In [466]:
manually_labelled_set['sender'] = (manually_labelled_set['AVG'] >= 2.5).astype(int)

In [467]:
manually_labelled_set.sender.value_counts()

1    65
0    35
Name: sender, dtype: int64

In [468]:
manually_labelled_set.head()

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text,AVG,sender
0,21,True,0,0,0.125,0.216667,2017,8,12,19,we must remember this truth: no matter our col...,1.0,0
1,11,False,0,0,0.233333,0.133333,2019,10,23,36,republicans are going to fight harder than eve...,4.5,1
2,1,False,0,0,0.384333,0.776667,2019,9,11,4,i am pleased to endorse governor mike parson o...,4.0,1
3,14,False,0,0,0.25,0.21875,2019,2,24,56,poll: suburban women are coming back into the ...,1.5,0
4,17,True,1,0,0.5,0.45,2019,5,4,53,today may 4th - is international firefighters ...,1.0,0


In [469]:
x_manual_test_set = manually_labelled_set.drop(['sender', 'text', 'AVG'], axis = 1, errors='ignore')
y_manual_test_set = manually_labelled_set.sender


In [470]:
model.score(x_manual_test_set, y_manual_test_set)


0.7

0.75

# TEST ON MANUALLY LABELLED SET (WITH TFIDF)

In [471]:
tfidf_vector_manual = pd.DataFrame(tfidf.transform(manually_labelled_set.text).toarray(), columns = tfidf.get_feature_names())

In [472]:
test_with_tfidf_manual = pd.concat([manual_test_set, tfidf_vector_manual], axis = 1).drop('text', axis = 1, errors='ignore')

In [473]:
test_with_tfidf_manual.head()

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,about,after,again,against,all,also,am,america,american,amp,an,and,are,as,at,back,bad,be,because,been,before,being,better,big,border,but,by,can,china,co,collusion,congress,country,crime,day.1,deal,democrats,dems,did,do,...,state,states,strong,tax,than,thank,that,the,their,them,there,they,this,time,to,today,total,trade,trump,two,united,up,us,very,vote,wall,want,was,way,we,well,were,what,when,which,whitehouse,who,why,will,win,witch,with,work,working,world,would,year.1,years,you,your
0,21,True,0,0,0.125,0.216667,2017,8,12,19,0.0,0.0,0.0,0.0,0.266008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.227369,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.187288,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.255037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.477087,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,11,False,0,0,0.233333,0.133333,2019,10,23,36,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15366,0.0,0.0,0.242908,0.0,0.0,0.250437,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.203794,0.0,0.0,0.0,0.201145,0.0,0.0,0.212367,...,0.0,0.0,0.0,0.0,0.223507,0.0,0.0,0.159705,0.0,0.0,0.0,0.0,0.0,0.0,0.291878,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222233,0.0,0.0,0.0,0.0,0.0,0.0,0.263123,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,False,0,0,0.384333,0.776667,2019,9,11,4,0.0,0.0,0.2036,0.0,0.0,0.0,0.21013,0.0,0.0,0.0,0.0,0.169597,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.220741,0.0,0.0,0.0,0.122395,0.066313,0.0,0.0,0.0,0.0,0.0,0.0,0.080796,0.0,0.226106,0.0,0.0,0.0,0.0,0.0,0.0,0.159417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.184552,0.0,0.0,0.0,0.0,0.0,0.12789,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,14,False,0,0,0.25,0.21875,2019,2,24,56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.177037,0.266407,0.0,0.0,0.21057,0.0,0.0,0.217096,0.0,0.0,0.0,0.0,0.0,0.390419,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.276887,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.458491,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,17,True,1,0,0.5,0.45,2019,5,4,53,0.0,0.0,0.0,0.0,0.193483,0.0,0.0,0.0,0.0,0.0,0.0,0.1099,0.165379,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.136225,0.0,0.0,0.0,0.0,0.273694,0.0,0.0,0.0,0.0,0.228563,...,0.0,0.0,0.0,0.0,0.0,0.221029,0.0,0.085942,0.22657,0.0,0.0,0.0,0.0,0.0,0.314139,0.234437,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.347013,0.0,0.0,0.0,0.0,0.0,0.0,0.435654,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.181675,0.0


In [474]:
x_test_with_tfidf_manual = test_with_tfidf_manual.drop('sender', axis=1, errors='ignore')

y_test_with_tfidf_manual = y_manual_test_set


In [475]:

model_with_tfidf.score(x_test_with_tfidf_manual, y_test_with_tfidf_manual)

0.88

# TWEET STUDIO (GROUND TRUTH:FALSE) GENERATION

In [476]:
tms_tweets = trump_tweet_with_features[trump_tweet_with_features['source'] == 'Twitter Media Studio']

In [477]:
tms_tweets['year'] = tms_tweets.created_at.dt.year
tms_tweets['month'] = tms_tweets.created_at.dt.month
tms_tweets['day'] = tms_tweets.created_at.dt.day
tms_tweets['hour'] = tms_tweets.created_at.dt.hour
tms_tweets['minute'] = tms_tweets.created_at.dt.minute


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

In [478]:
tms_tweets['sender'] = 0
tms_cleaned = tms_tweets[np.append(features, ['text', 'sender'])]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [479]:
tms_cleaned.tail()

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text,sender
9161,23,True,0,0,0.0,0.0,2019,10,18,37,republicans must stick together and fight! htt...,0
9206,3,True,0,0,0.0,0.0,2019,10,21,54,https://t.co/osn6amjzo4,0
9302,21,True,0,0,0.0,0.0,2019,10,27,24,thank you to @martharaddatz and @terrymoran fo...,0
9345,20,True,0,1,0.15,0.377778,2019,10,30,42,the democrats have been on this path for 3 yea...,0
9362,22,True,0,0,-0.153125,0.2125,2019,10,31,44,while the do nothing democrats fail the americ...,0


In [480]:
manually_labelled_set

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text,AVG,sender
0,21,True,0,0,0.125000,0.216667,2017,8,12,19,we must remember this truth: no matter our col...,1.0,0
1,11,False,0,0,0.233333,0.133333,2019,10,23,36,republicans are going to fight harder than eve...,4.5,1
2,1,False,0,0,0.384333,0.776667,2019,9,11,4,i am pleased to endorse governor mike parson o...,4.0,1
3,14,False,0,0,0.250000,0.218750,2019,2,24,56,poll: suburban women are coming back into the ...,1.5,0
4,17,True,1,0,0.500000,0.450000,2019,5,4,53,today may 4th - is international firefighters ...,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,11,False,0,1,0.000000,0.562500,2019,5,23,56,the democrats are getting nothing done in cong...,3.0,1
96,14,False,0,0,0.111806,0.536111,2018,6,16,1,the ig report totally destroys james comey and...,2.5,1
97,2,False,0,1,-0.200000,0.883333,2017,7,25,23,the amazon washington post fabricated the fact...,4.0,1
98,21,True,0,0,0.000000,0.000000,2019,4,22,30,this should never happen to another president ...,1.0,0


In [481]:
tms_merged = tms_cleaned.append(manually_labelled_set.drop('AVG', axis = 1, errors='ignore'), ignore_index=True)

In [482]:
tms_merged

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text,sender
0,17,True,0,0,0.000000,0.000000,2018,10,17,29,https://t.co/afqhydsmb5,0
1,17,True,0,0,0.000000,0.000000,2018,10,17,38,https://t.co/cmxxw2fedq,0
2,17,True,0,0,0.000000,0.000000,2018,10,17,41,https://t.co/madlgnhese,0
3,17,True,0,0,0.000000,0.000000,2018,10,17,52,https://t.co/mnkdygu5sr,0
4,17,True,0,0,0.500000,1.000000,2018,10,26,23,i would like to begin today’s remarks by provi...,0
...,...,...,...,...,...,...,...,...,...,...,...,...
214,11,False,0,1,0.000000,0.562500,2019,5,23,56,the democrats are getting nothing done in cong...,1
215,14,False,0,0,0.111806,0.536111,2018,6,16,1,the ig report totally destroys james comey and...,1
216,2,False,0,1,-0.200000,0.883333,2017,7,25,23,the amazon washington post fabricated the fact...,1
217,21,True,0,0,0.000000,0.000000,2019,4,22,30,this should never happen to another president ...,0


In [483]:
x_tms_merged = tms_merged.drop(['sender', 'text'], axis = 1, errors='ignore')
y_tms_merged = tms_merged.sender

In [484]:
model.score(x_tms_merged, y_tms_merged)

0.863013698630137

# TWEET STUDIO (GROUND TRUTH:FALSE) GENERATION with TFIDF

In [485]:
tfidf_vector_full = pd.DataFrame(tfidf.transform(tms_merged.text).toarray(), columns = tfidf.get_feature_names())

In [486]:
test_with_tfidf_full = pd.concat([tms_merged, tfidf_vector_full], axis = 1).drop('text', axis = 1, errors='ignore')

In [487]:
x_tfidf_full_test = test_with_tfidf_full.drop('sender', axis=1, errors='ignore')
y_tfidf_full_test = test_with_tfidf_full.sender


In [488]:
model_with_tfidf.score(x_tfidf_full_test, y_tfidf_full_test)

0.9452054794520548

# REAL DONALD TRUMP TWEETS GENERATION

In [489]:
trump_tweet_with_features

trump_tweet_with_features['year'] = trump_tweet_with_features.created_at.dt.year
trump_tweet_with_features['month'] = trump_tweet_with_features.created_at.dt.month
trump_tweet_with_features['day'] = trump_tweet_with_features.created_at.dt.day
trump_tweet_with_features['hour'] = trump_tweet_with_features.created_at.dt.hour
trump_tweet_with_features['minute'] = trump_tweet_with_features.created_at.dt.minute

In [490]:
trump_tweet_with_features_cleaned  = trump_tweet_with_features[np.append(features, ['text'])]

In [491]:
trump_tweet_with_features_cleaned.head()

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text
0,11,False,0,0,0.45,0.833333,2016,11,9,36,such a beautiful and important evening! the fo...
1,19,True,0,0,1.0,1.0,2016,11,10,31,happy 241st birthday to the u.s. marine corps!...
2,2,False,0,0,0.58,0.676667,2016,11,11,10,a fantastic day in d.c. met with president oba...
3,2,False,0,0,0.009375,0.675,2016,11,11,19,just had a very open and successful presidenti...
4,11,False,0,0,0.41,0.563333,2016,11,11,14,love the fact that the small groups of protest...


In [492]:
tfidf_vector_all = pd.DataFrame(tfidf.transform(trump_tweet_with_features_cleaned.text).toarray(), columns = tfidf.get_feature_names())

In [493]:
to_generate = pd.concat([trump_tweet_with_features_cleaned, tfidf_vector_all], axis = 1).drop('text', axis = 1, errors='ignore')

In [494]:
to_generate

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,about,after,again,against,all,also,am,america,american,amp,an,and,are,as,at,back,bad,be,because,been,before,being,better,big,border,but,by,can,china,co,collusion,congress,country,crime,day.1,deal,democrats,dems,did,do,...,state,states,strong,tax,than,thank,that,the,their,them,there,they,this,time,to,today,total,trade,trump,two,united,up,us,very,vote,wall,want,was,way,we,well,were,what,when,which,whitehouse,who,why,will,win,witch,with,work,working,world,would,year.1,years,you,your
0,11,False,0,0,0.450000,0.833333,2016,11,9,36,0.000000,0.0,0.314953,0.000000,0.230941,0.0,0.0,0.0,0.0,0.000000,0.0,0.262353,0.000000,0.258873,0.0,0.0,0.0,0.199657,0.0,0.0,0.339969,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.102580,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.00000,0.0,0.207097,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.395671,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
1,19,True,0,0,1.000000,1.000000,2016,11,10,31,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.285876,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.463842,0.000000,0.180355,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.219746,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.381256,0.554051
2,2,False,0,0,0.580000,0.676667,2016,11,11,10,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.364535,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.320658,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.216091,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
3,2,False,0,0,0.009375,0.675000,2016,11,11,19,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.147141,0.221420,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.267925,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.115065,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.553239,0.0,0.0,0.0,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
4,11,False,0,0,0.410000,0.563333,2016,11,11,14,0.000000,0.0,0.000000,0.000000,0.292238,0.0,0.0,0.0,0.0,0.000000,0.0,0.165994,0.000000,0.000000,0.0,0.0,0.0,0.252651,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.331288,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.239590,0.259616,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.00000,0.0,0.262065,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.250346,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9489,15,False,0,0,-0.140625,0.458333,2019,11,7,16,0.000000,0.0,0.000000,0.223011,0.000000,0.0,0.0,0.0,0.0,0.145649,0.0,0.170318,0.000000,0.168059,0.0,0.0,0.0,0.129616,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.122915,0.133189,0.0,0.000000,0.0,0.144049,0.143742,0.000000,0.162279,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.14982,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.218509,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
9490,15,True,1,0,0.000000,0.000000,2019,11,7,18,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.395065,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.641004,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.526874,0.000000
9491,15,False,0,0,-0.116071,0.750000,2019,11,7,27,0.298151,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.237211,0.0,0.277387,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.359451,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.108459,0.0,0.000000,0.0,0.469210,0.234105,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.00000,0.0,0.218964,0.0,0.0,0.0,0.0,0.325292,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
9492,15,False,0,0,0.169697,0.349053,2019,11,7,41,0.000000,0.0,0.000000,0.288874,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.220618,0.165994,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.270379,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.172524,0.0,0.267093,0.0,0.000000,0.000000,0.000000,0.210205,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.00000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.284245,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000


In [495]:
prediction = model_with_tfidf.predict(to_generate)

In [496]:
predict_proba = model_with_tfidf.predict_proba(to_generate)[:,1]

In [497]:
trump_tweet_with_features_cleaned['trump_classification_binary'] = prediction
trump_tweet_with_features_cleaned['trump_classification_proba'] = predict_proba


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [498]:
trump_tweet_with_features_cleaned

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text,trump_classification_binary,trump_classification_proba
0,11,False,0,0,0.450000,0.833333,2016,11,9,36,such a beautiful and important evening! the fo...,1,0.8
1,19,True,0,0,1.000000,1.000000,2016,11,10,31,happy 241st birthday to the u.s. marine corps!...,0,0.0
2,2,False,0,0,0.580000,0.676667,2016,11,11,10,a fantastic day in d.c. met with president oba...,1,1.0
3,2,False,0,0,0.009375,0.675000,2016,11,11,19,just had a very open and successful presidenti...,1,1.0
4,11,False,0,0,0.410000,0.563333,2016,11,11,14,love the fact that the small groups of protest...,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9489,15,False,0,0,-0.140625,0.458333,2019,11,7,16,it was just explained to me that for next week...,1,0.8
9490,15,True,1,0,0.000000,0.000000,2019,11,7,18,thank you! #maga https://t.co/e6dzshyfmv,0,0.0
9491,15,False,0,0,-0.116071,0.750000,2019,11,7,27,the amazon washington post and three lowlife r...,1,0.8
9492,15,False,0,0,0.169697,0.349053,2019,11,7,41,the radical left dems and lamestream media are...,1,1.0


# Adding Dummy Variables

In [499]:
word_list = ["stock", 'market', "agreement", "negotiator", "negotiation", 
             "trade", "china", "economy", "job", "tariff", "employment",
            "s&p", "auto", "farmer"]

wl_column_names = pd.Series(word_list).apply(lambda x: "exist_{}".format(x))

In [500]:
for num, word in enumerate(word_list):
    
    trump_tweet_with_features_cleaned["exist_{}".format(word)] = (trump_tweet_with_features_cleaned['text'].str.contains(word) == True).astype(int)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [501]:
trump_tweet_with_features_cleaned.head()

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text,trump_classification_binary,trump_classification_proba,exist_stock,exist_market,exist_agreement,exist_negotiator,exist_negotiation,exist_trade,exist_china,exist_economy,exist_job,exist_tariff,exist_employment,exist_s&p,exist_auto,exist_farmer
0,11,False,0,0,0.45,0.833333,2016,11,9,36,such a beautiful and important evening! the fo...,1,0.8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19,True,0,0,1.0,1.0,2016,11,10,31,happy 241st birthday to the u.s. marine corps!...,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,False,0,0,0.58,0.676667,2016,11,11,10,a fantastic day in d.c. met with president oba...,1,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2,False,0,0,0.009375,0.675,2016,11,11,19,just had a very open and successful presidenti...,1,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,11,False,0,0,0.41,0.563333,2016,11,11,14,love the fact that the small groups of protest...,1,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [502]:
trump_tweet_with_features_cleaned['word_mentions'] = trump_tweet_with_features_cleaned[wl_column_names].sum(axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [503]:
trump_tweet_with_features_cleaned.tail()


Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text,trump_classification_binary,trump_classification_proba,exist_stock,exist_market,exist_agreement,exist_negotiator,exist_negotiation,exist_trade,exist_china,exist_economy,exist_job,exist_tariff,exist_employment,exist_s&p,exist_auto,exist_farmer,word_mentions
9489,15,False,0,0,-0.140625,0.458333,2019,11,7,16,it was just explained to me that for next week...,1,0.8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9490,15,True,1,0,0.0,0.0,2019,11,7,18,thank you! #maga https://t.co/e6dzshyfmv,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9491,15,False,0,0,-0.116071,0.75,2019,11,7,27,the amazon washington post and three lowlife r...,1,0.8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9492,15,False,0,0,0.169697,0.349053,2019,11,7,41,the radical left dems and lamestream media are...,1,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9493,15,False,0,0,0.212121,0.351515,2019,11,7,43,stock market up big today. a new record. enjoy!,1,0.7,1,1,0,0,0,0,0,0,0,0,0,0,0,0,2


In [504]:
trump_tweet_with_features_cleaned.to_csv("Working_Prediction.csv")

# Adding `effective_date` Feature

In [505]:
trump_tweet_with_features_cleaned['created_datetime'] = trump_tweet_with_features['created_at']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [506]:
trump_tweet_with_features_cleaned.tail()

Unnamed: 0,hour,linked,hash,dot,polarity,subjectivity,year,month,day,minute,text,trump_classification_binary,trump_classification_proba,exist_stock,exist_market,exist_agreement,exist_negotiator,exist_negotiation,exist_trade,exist_china,exist_economy,exist_job,exist_tariff,exist_employment,exist_s&p,exist_auto,exist_farmer,word_mentions,created_datetime
9489,15,False,0,0,-0.140625,0.458333,2019,11,7,16,it was just explained to me that for next week...,1,0.8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-07 15:16:00
9490,15,True,1,0,0.0,0.0,2019,11,7,18,thank you! #maga https://t.co/e6dzshyfmv,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-07 15:18:00
9491,15,False,0,0,-0.116071,0.75,2019,11,7,27,the amazon washington post and three lowlife r...,1,0.8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-07 15:27:00
9492,15,False,0,0,0.169697,0.349053,2019,11,7,41,the radical left dems and lamestream media are...,1,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-07 15:41:00
9493,15,False,0,0,0.212121,0.351515,2019,11,7,43,stock market up big today. a new record. enjoy!,1,0.7,1,1,0,0,0,0,0,0,0,0,0,0,0,0,2,2019-11-07 15:43:00
