In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline
sns.set_palette('colorblind')
sns.set_style('white')



In [2]:
trump_tweets = pd.read_csv('realdonaldtrump_tweets.csv')
clinton_tweets = pd.read_csv('hillaryclinton_tweets.csv')

trump_tweets["author"] = "Trump"
clinton_tweets["author"] = "Clinton"

tweets = pd.concat([trump_tweets, clinton_tweets])
tweets = tweets[[ u'id', u'text', u'created_at', u'favorite_count', u'retweet_count', u'author']]
tweets["id"] = tweets["id"].astype("str")
tweets = tweets.reset_index()
tweets = tweets.drop("index",1)
tweets

Unnamed: 0,id,text,created_at,favorite_count,retweet_count,author
0,783436108176629760,Thank you ARIZONA! This is a MOVEMENT like nob...,Tue Oct 04 22:38:18 +0000 2016,4229,1729,Trump
1,783393314309484544,My childcare plan makes a difference for worki...,Tue Oct 04 19:48:15 +0000 2016,11110,4719,Trump
2,783391423663964160,I will be watching the great Governor @Mike_Pe...,Tue Oct 04 19:40:44 +0000 2016,14211,4809,Trump
3,783390310969651200,"Join me in Reno, Nevada tomorrow at 3:30pm! #A...",Tue Oct 04 19:36:19 +0000 2016,5743,2053,Trump
4,783149570721144832,"Join me in Reno, Nevada on Wednesday at 3:30pm...",Tue Oct 04 03:39:42 +0000 2016,13895,6189,Trump
5,783129603363659776,Thank you Colorado! #MAGA\nhttps://t.co/3KWOl2...,Tue Oct 04 02:20:21 +0000 2016,20271,8390,Trump
6,783065029172088832,We must bring the truth directly to hard-worki...,Mon Oct 03 22:03:46 +0000 2016,22735,10633,Trump
7,783059449128759296,"Thank you Pueblo, Colorado! \n#TrumpRally #Ame...",Mon Oct 03 21:41:35 +0000 2016,15042,6178,Trump
8,782946021957599232,"Join me in Henderson, Nevada on Wednesday at 1...",Mon Oct 03 14:10:52 +0000 2016,14300,5837,Trump
9,782728287718150144,Just announced that Iraq (U.S.) is preparing f...,Sun Oct 02 23:45:40 +0000 2016,37654,13857,Trump


In [3]:
def percentInCaps(s):
    inCaps = sum(1 for c in s if c.isupper())
    inLowerCase = sum(1 for c in s if c.islower())
    return inCaps/float(inCaps + inLowerCase) 

def num_hashtags(s):
    return sum(1 for c in s if c == "#")

def num_mentions(s):
    return sum(1 for c in s if c == "@")

punctuation = '!"$%&\'()*+,-./:;<=>?[\\]^_`{|}~'
def percentPunctuation(s):
    return sum(1 for c in s if c in punctuation)/float(len(s))
    
tweets["percentPunctuation"] = tweets["text"].apply(lambda x: percentPunctuation(x)) 
tweets["percentInCaps"] = tweets["text"].apply(lambda x: percentInCaps(x))    
tweets["numHastags"] = tweets["text"].apply(lambda x: num_hashtags(x))
tweets["numMentions"] = tweets["text"].apply(lambda x: num_mentions(x))

#Get a summary of Clinton vs. Trump Tweet attributes
tweets.groupby("author").mean()

Unnamed: 0_level_0,favorite_count,retweet_count,percentPunctuation,percentInCaps,numHastags,numMentions
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Clinton,5991.002167,3144.962229,0.063471,0.093032,0.129102,0.491022
Trump,16600.378303,5974.324837,0.05906,0.139253,0.478707,0.661486


In [4]:
#Extract month,day,and hour from timestamp and append back to data frame
tweets["created_at"] = pd.to_datetime(tweets["created_at"])
tweets["hour"] = tweets["created_at"].apply(lambda x: x.hour)
tweets["day"] = tweets["created_at"].apply(lambda x: x.weekday())
tweets["month"] = tweets["created_at"].apply(lambda x: x.month)

#Take a look at the new frame
tweets.head()

Unnamed: 0,id,text,created_at,favorite_count,retweet_count,author,percentPunctuation,percentInCaps,numHastags,numMentions,hour,day,month
0,783436108176629760,Thank you ARIZONA! This is a MOVEMENT like nob...,2016-10-04 22:38:18,4229,1729,Trump,0.056338,0.330275,0,0,22,1,10
1,783393314309484544,My childcare plan makes a difference for worki...,2016-10-04 19:48:15,11110,4719,Trump,0.060606,0.068627,1,0,19,1,10
2,783391423663964160,I will be watching the great Governor @Mike_Pe...,2016-10-04 19:40:44,14211,4809,Trump,0.033333,0.075269,0,1,19,1,10
3,783390310969651200,"Join me in Reno, Nevada tomorrow at 3:30pm! #A...",2016-10-04 19:36:19,5743,2053,Trump,0.09375,0.217391,2,0,19,1,10
4,783149570721144832,"Join me in Reno, Nevada on Wednesday at 3:30pm...",2016-10-04 03:39:42,13895,6189,Trump,0.079365,0.217391,1,0,3,1,10
