# Retweet Analysis

* Import data from Tweepy results

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('tweet_average_count.csv')

In [3]:
df.head()

Unnamed: 0,name,ave_retweet,ave_favorite,retweet_count,favorite_count,tweet_count,state,district_N,party
0,Aaron Swisher,3.163265,5.566327,620,1091,196,Idaho,2,Democratic
1,Abby Finkenauer,23.771795,77.407692,9271,30189,390,Iowa,1,Democratic
2,Abigail Spanberger,29.395604,75.64497,34775,89488,1183,Virginia,7,Democratic
3,Adam Kinzinger,10.333333,27.444444,279,741,27,Illinois,16,Republican
4,Adam Smith,1.52459,6.202186,279,1135,183,Washington,9,Democratic


In [4]:
df.dtypes

name               object
ave_retweet       float64
ave_favorite      float64
retweet_count       int64
favorite_count      int64
tweet_count         int64
state              object
district_N          int64
party              object
dtype: object

In [5]:
df.party.value_counts()

Democratic    367
Republican    283
Name: party, dtype: int64

In [6]:
df.district_N = df.district_N.astype(str)

In [7]:
for i in range(0,len(df)):
    if len(df.iloc[i,7]) == 1:
        df.iloc[i,7] = '0' + df.iloc[i,7]

In [8]:
df.head()

Unnamed: 0,name,ave_retweet,ave_favorite,retweet_count,favorite_count,tweet_count,state,district_N,party
0,Aaron Swisher,3.163265,5.566327,620,1091,196,Idaho,2,Democratic
1,Abby Finkenauer,23.771795,77.407692,9271,30189,390,Iowa,1,Democratic
2,Abigail Spanberger,29.395604,75.64497,34775,89488,1183,Virginia,7,Democratic
3,Adam Kinzinger,10.333333,27.444444,279,741,27,Illinois,16,Republican
4,Adam Smith,1.52459,6.202186,279,1135,183,Washington,9,Democratic


In [9]:
abb_df = pd.read_csv('states_abb.csv')

In [10]:
abb_df.head()

Unnamed: 0,State,Abbreviation
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


In [11]:
df['abbreviation'] = np.nan

In [12]:
for i in range(0,len(df)):
    for j in range(0,len(abb_df)):
        if df.iloc[i,6] == abb_df.iloc[j,0]:
            df.iloc[i,9] = abb_df.iloc[j,1]

In [13]:
df.head()

Unnamed: 0,name,ave_retweet,ave_favorite,retweet_count,favorite_count,tweet_count,state,district_N,party,abbreviation
0,Aaron Swisher,3.163265,5.566327,620,1091,196,Idaho,2,Democratic,ID
1,Abby Finkenauer,23.771795,77.407692,9271,30189,390,Iowa,1,Democratic,IA
2,Abigail Spanberger,29.395604,75.64497,34775,89488,1183,Virginia,7,Democratic,VA
3,Adam Kinzinger,10.333333,27.444444,279,741,27,Illinois,16,Republican,IL
4,Adam Smith,1.52459,6.202186,279,1135,183,Washington,9,Democratic,WA


In [14]:
df['district_key'] = df.abbreviation + '-' + df.district_N

In [15]:
df.head()

Unnamed: 0,name,ave_retweet,ave_favorite,retweet_count,favorite_count,tweet_count,state,district_N,party,abbreviation,district_key
0,Aaron Swisher,3.163265,5.566327,620,1091,196,Idaho,2,Democratic,ID,ID-02
1,Abby Finkenauer,23.771795,77.407692,9271,30189,390,Iowa,1,Democratic,IA,IA-01
2,Abigail Spanberger,29.395604,75.64497,34775,89488,1183,Virginia,7,Democratic,VA,VA-07
3,Adam Kinzinger,10.333333,27.444444,279,741,27,Illinois,16,Republican,IL,IL-16
4,Adam Smith,1.52459,6.202186,279,1135,183,Washington,9,Democratic,WA,WA-09


In [62]:
df.to_csv('candidates_summary.csv',index=False)

In [63]:
df[df.party == 'Democratic'].tweet_count.sum()/df.tweet_count.sum()

0.7341719409502461

In [64]:
df[df.party == 'Democratic'].tweet_count.sum()/df[df.party == 'Democratic'].name.count()

480.1144414168937

In [65]:
df[df.party == 'Republican'].tweet_count.sum()/df[df.party == 'Republican'].name.count()

225.43816254416961

In [66]:
(df[df.party == 'Democratic'].tweet_count.sum()/df[df.party == 'Democratic'].name.count())/(df[df.party == 'Republican'].tweet_count.sum()/df[df.party == 'Republican'].name.count())

2.129694617799353

In [67]:
df[df.party == 'Democratic'].retweet_count.sum()/df[df.party == 'Democratic'].name.count()

32211.686648501363

In [68]:
df[df.party == 'Republican'].retweet_count.sum()/df[df.party == 'Republican'].name.count()

6860.508833922261

In [69]:
(df[df.party == 'Democratic'].retweet_count.sum()/df[df.party == 'Democratic'].name.count())/(df[df.party == 'Republican'].retweet_count.sum()/df[df.party == 'Republican'].name.count())

4.6952328796996

In [70]:
df[df.party == 'Democratic'].favorite_count.sum()/df[df.party == 'Democratic'].name.count()

91910.34332425069

In [71]:
df[df.party == 'Republican'].favorite_count.sum()/df[df.party == 'Republican'].name.count()

15958.374558303887

In [72]:
(df[df.party == 'Democratic'].favorite_count.sum()/df[df.party == 'Democratic'].name.count())/(df[df.party == 'Republican'].favorite_count.sum()/df[df.party == 'Republican'].name.count())

5.75938000380029

## Tweets by district

In [16]:
democratic_df = df[df.party == 'Democratic'].reset_index(drop=True)

In [17]:
democratic_df = democratic_df.groupby(by='district_key').sum().reset_index()

In [18]:
democratic_df.head()

Unnamed: 0,district_key,ave_retweet,ave_favorite,retweet_count,favorite_count,tweet_count
0,AK-00,6.543367,19.433673,2565,7618,392
1,AL-01,14.382716,21.595679,4660,6997,324
2,AL-02,17.929286,34.152769,26876,51195,1499
3,AL-03,10.682791,25.846947,13471,32593,1261
4,AL-04,15.183857,24.053812,3386,5364,223


In [19]:
democratic_df.ave_retweet = democratic_df.retweet_count / democratic_df.tweet_count

In [20]:
democratic_df.ave_favorite = democratic_df.favorite_count / democratic_df.tweet_count

In [21]:
democratic_df.head()

Unnamed: 0,district_key,ave_retweet,ave_favorite,retweet_count,favorite_count,tweet_count
0,AK-00,6.543367,19.433673,2565,7618,392
1,AL-01,14.382716,21.595679,4660,6997,324
2,AL-02,17.929286,34.152769,26876,51195,1499
3,AL-03,10.682791,25.846947,13471,32593,1261
4,AL-04,15.183857,24.053812,3386,5364,223


In [22]:
republican_df = df[df.party == 'Republican'].reset_index(drop=True)

In [23]:
republican_df = republican_df.groupby(by='district_key').sum().reset_index()

In [24]:
republican_df.head()

Unnamed: 0,district_key,ave_retweet,ave_favorite,retweet_count,favorite_count,tweet_count
0,AK-00,0.666667,4.533333,10,68,15
1,AL-01,1.742424,5.318182,115,351,66
2,AL-03,2.166667,4.966667,130,298,60
3,AL-06,2.157895,7.473684,82,284,38
4,AR-01,2.238095,4.97619,94,209,42


In [25]:
republican_df.ave_retweet = republican_df.retweet_count / republican_df.tweet_count

In [26]:
republican_df.ave_favorite = republican_df.favorite_count / republican_df.tweet_count

In [27]:
republican_df.head()

Unnamed: 0,district_key,ave_retweet,ave_favorite,retweet_count,favorite_count,tweet_count
0,AK-00,0.666667,4.533333,10,68,15
1,AL-01,1.742424,5.318182,115,351,66
2,AL-03,2.166667,4.966667,130,298,60
3,AL-06,2.157895,7.473684,82,284,38
4,AR-01,2.238095,4.97619,94,209,42


In [28]:
district_df = df.district_key.value_counts().reset_index()

In [29]:
district_df.columns = ['district_key','counts']

In [30]:
district_df.head()

Unnamed: 0,district_key,counts
0,LA-03,4
1,LA-06,3
2,LA-01,3
3,NY-09,2
4,IL-15,2


In [31]:
district_df['democratic'] = np.nan
district_df['republican'] = np.nan

In [32]:
district_df.head()

Unnamed: 0,district_key,counts,democratic,republican
0,LA-03,4,,
1,LA-06,3,,
2,LA-01,3,,
3,NY-09,2,,
4,IL-15,2,,


In [33]:
for i in range(0,len(district_df)):
    for j in range(0,len(democratic_df)):
        if district_df.iloc[i,0] == democratic_df.iloc[j,0]:
            district_df.iloc[i,2] = democratic_df.iloc[j,1]

In [34]:
for i in range(0,len(district_df)):
    for j in range(0,len(republican_df)):
        if district_df.iloc[i,0] == republican_df.iloc[j,0]:
            district_df.iloc[i,3] = republican_df.iloc[j,1]

In [35]:
district_df.head()

Unnamed: 0,district_key,counts,democratic,republican
0,LA-03,4,7.030172,1.9
1,LA-06,3,8.34767,0.571429
2,LA-01,3,10.019749,2.5
3,NY-09,2,2.197044,19.278409
4,IL-15,2,1.828244,0.0


In [36]:
district_df = district_df.fillna(0)

In [37]:
district_df['active'] = np.nan

In [38]:
for i in range(0,len(district_df)):
    if district_df.iloc[i,2] > district_df.iloc[i,3]:
        district_df.iloc[i,4] = 'Democratic'
    else:
        district_df.iloc[i,4] = 'Republican'

In [39]:
district_df.head()

Unnamed: 0,district_key,counts,democratic,republican,active
0,LA-03,4,7.030172,1.9,Democratic
1,LA-06,3,8.34767,0.571429,Democratic
2,LA-01,3,10.019749,2.5,Democratic
3,NY-09,2,2.197044,19.278409,Republican
4,IL-15,2,1.828244,0.0,Democratic


In [40]:
district_df.active.value_counts()

Democratic    306
Republican     90
Name: active, dtype: int64

In [41]:
rating_df = pd.read_csv('Rating.csv')

In [42]:
rating_df.head()

Unnamed: 0,District,Rating,Representative,STATEFP,CD115FP,AFFGEOID
0,AL-07,Solid Democratic,Terri Sewell,1,7,5001500US0107
1,AZ-03,Solid Democratic,Raúl Grijalva,4,3,5001500US0403
2,AZ-07,Solid Democratic,Ruben Gallego,4,7,5001500US0407
3,AZ-09,Solid Democratic,Open,4,9,5001500US0409
4,CA-02,Solid Democratic,Jared Huffman,6,2,5001500US0602


In [43]:
district_df['rating'] = np.nan

In [44]:
district_df.head()

Unnamed: 0,district_key,counts,democratic,republican,active,rating
0,LA-03,4,7.030172,1.9,Democratic,
1,LA-06,3,8.34767,0.571429,Democratic,
2,LA-01,3,10.019749,2.5,Democratic,
3,NY-09,2,2.197044,19.278409,Republican,
4,IL-15,2,1.828244,0.0,Democratic,


In [45]:
for i in range(0,len(district_df)):
    for j in range(0,len(rating_df)):
        if district_df.iloc[i,0] == rating_df.iloc[j,0]:
            district_df.iloc[i,5] = rating_df.iloc[j,1]

In [46]:
district_df.head()

Unnamed: 0,district_key,counts,democratic,republican,active,rating
0,LA-03,4,7.030172,1.9,Democratic,Solid Republican
1,LA-06,3,8.34767,0.571429,Democratic,Solid Republican
2,LA-01,3,10.019749,2.5,Democratic,Solid Republican
3,NY-09,2,2.197044,19.278409,Republican,Solid Democratic
4,IL-15,2,1.828244,0.0,Democratic,Solid Republican


In [47]:
len(district_df)

396

In [48]:
district_df.to_csv('district.csv',index=False)

In [49]:
district_df[(district_df.rating == 'Solid Republican') | (district_df.rating == 'Likely Republican') | (district_df.rating == 'Lean Republican')].active.value_counts()

Democratic    154
Republican     36
Name: active, dtype: int64

In [50]:
district_df[(district_df.rating == 'Solid Democratic') | (district_df.rating == 'Likely Democratic') | (district_df.rating == 'Lean Democratic')].active.value_counts()

Democratic    123
Republican     52
Name: active, dtype: int64

In [51]:
district_df[(district_df.rating == 'Toss-Up Democratic') | (district_df.rating == 'Toss-Up Republican')].active.value_counts()

Democratic    29
Republican     2
Name: active, dtype: int64

### Adding GEOID

In [52]:
district_df['AFFGEOID'] = np.nan

In [53]:
district_df.head()

Unnamed: 0,district_key,counts,democratic,republican,active,rating,AFFGEOID
0,LA-03,4,7.030172,1.9,Democratic,Solid Republican,
1,LA-06,3,8.34767,0.571429,Democratic,Solid Republican,
2,LA-01,3,10.019749,2.5,Democratic,Solid Republican,
3,NY-09,2,2.197044,19.278409,Republican,Solid Democratic,
4,IL-15,2,1.828244,0.0,Democratic,Solid Republican,


In [54]:
for i in range(0,len(district_df)):
    for j in range(0,len(rating_df)):
        if district_df.iloc[i,0] == rating_df.iloc[j,0]:
            district_df.iloc[i,6] = rating_df.iloc[j,5]

In [55]:
district_df.head()

Unnamed: 0,district_key,counts,democratic,republican,active,rating,AFFGEOID
0,LA-03,4,7.030172,1.9,Democratic,Solid Republican,5001500US2203
1,LA-06,3,8.34767,0.571429,Democratic,Solid Republican,5001500US2206
2,LA-01,3,10.019749,2.5,Democratic,Solid Republican,5001500US2201
3,NY-09,2,2.197044,19.278409,Republican,Solid Democratic,5001500US3609
4,IL-15,2,1.828244,0.0,Democratic,Solid Republican,5001500US1715


In [56]:
district_df.to_csv('twitter_count_geoid.csv',index=False)

## Pennsylvania

In [57]:
pennsylvania_df = district_df[district_df.district_key.str.contains('PA')].reset_index(drop=True)

In [58]:
pennsylvania_df['district'] = pennsylvania_df.district_key.str.extract('(\d\d)').astype(int)

In [59]:
pennsylvania_df

Unnamed: 0,district_key,counts,democratic,republican,active,rating,AFFGEOID,district
0,PA-03,2,4.298472,0.563073,Democratic,Solid Democratic,5001500US4203,3
1,PA-16,2,11.379845,1.345679,Democratic,Lean Republican,5001500US4216,16
2,PA-01,2,20.915452,2.993506,Democratic,Toss-Up Republican,5001500US4201,1
3,PA-13,2,8.018868,3.804348,Democratic,Solid Republican,5001500US4213,13
4,PA-05,2,10.086705,2.585526,Democratic,Likely Democratic,5001500US4205,5
5,PA-09,2,4.694656,1.054662,Democratic,Solid Republican,5001500US4209,9
6,PA-15,2,6.752941,0.165179,Democratic,Solid Republican,5001500US4215,15
7,PA-06,2,49.795527,4.481013,Democratic,Likely Democratic,5001500US4206,6
8,PA-04,2,10.382743,1.353234,Democratic,Solid Democratic,5001500US4204,4
9,PA-14,2,11.824742,1.3,Democratic,Likely Republican,5001500US4214,14


In [60]:
pennsylvania_df.to_csv('pennsylvania.csv',index=False)

## Blank district

In [61]:
rating_df[~rating_df.District.isin(district_df.district_key)].to_csv('blank.csv',index=False)