In [1]:
import json
import os
import glob
import re
import pandas as pd
from os import makedirs
from os.path import join, exists
from pathlib import Path
from datetime import datetime, date, timedelta
from pathlib import Path

In [2]:
def drop_df_columns(df):
    cols = df.columns
    for c in cols:
        if 'isPartial' in c:
            df = df.drop([c], axis=1)
            
    return df

In [3]:
def get_and_save_trends(swing_states, src_path, target_path):
    
    makedirs(target_path, exist_ok=True)
    result_df = []

    file_path = join(src_path, 'US_trends.csv')
    df = pd.read_csv(file_path)
    df = drop_df_columns(df)
    
    us_mean = df[df.columns[len(df.columns) - 1]].mean()
    
    for st in swing_states:
        file_path = join(src_path, st + '_trends.csv')
        df = pd.read_csv(file_path)
        
        df = drop_df_columns(df)
        
        if len(df) == 0:
            result_df.append({'state': st, 'score': us_mean/2})
        else:
            st_mean = df[df.columns[len(df.columns) - 1]].mean()
            result_df.append({'state': st, 'score': st_mean})
       
        #if len(result_df) > 0:
        #    result_df = pd.merge(df, result_df, how='inner', on=['date', 'date'], 
        #                         suffixes=('_' + st + '1', '_y' + st + '2'))
        #else:
        #    result_df = df
     
    result = pd.DataFrame(result_df)
    print(result)
    #print(result_df.columns)
    
    #col = result_df.loc[: , result_df.columns[1]: result_df.columns[len(result_df.columns) - 1]]
    
    #result_df['score'] = col.mean(axis=1)
    
    #result_df_min = result_df[['date', 'score']]
    result.to_csv(join(target_path, 'trends.csv'))
    

In [4]:
def get_candidates(yr):
    if yr == '2012':
        return 'obama', 'romney'
    elif yr == '2016':
        return 'hillary', 'trump'
    else:
        return 'biden', 'trump'

In [5]:
def agg_all(states, yr):
    topics = ['economy', 'environment', 'health', 'immigration']
    years = ['2012', '2016', '2020']
    
    for tp in topics:
        cand1, cand2 = get_candidates(yr)

        src_path = join('cand-state', tp, yr)
        target_path = join('cand-state-agg', tp, yr)

        get_and_save_trends(states, join(src_path, cand1), join(target_path, cand1)) 

        get_and_save_trends(states, join(src_path, cand2), join(target_path, cand2)) 

In [6]:
states = ['US', 'US-AL', 'US-AK', 'US-AZ', 'US-AR', 'US-CA', 'US-CO', 'US-CT', 'US-DE', 'US-DC', 'US-FL', 'US-GA', \
         'US-HI', 'US-ID', 'US-IL', 'US-IN', 'US-IA', 'US-KS', 'US-KY', 'US-LA', 'US-ME', 'US-MD', 'US-MA', 'US-MI', \
         'US-MN', 'US-MS', 'US-MO', 'US-MT', 'US-NE', 'US-NV', 'US-NH', 'US-NJ', 'US-NM', 'US-NY', 'US-NC', 'US-ND', \
         'US-OH', 'US-OK', 'US-OR', 'US-PA', 'US-RI', 'US-SC', 'US-SD', 'US-TN', 'US-TX', 'US-UT', 'US-VT', 'US-VA', \
         'US-WA', 'US-WV', 'US-WI', 'US-WY']


In [7]:

agg_all(states, '2020')

    state      score
0      US  52.142857
1   US-AL  26.071429
2   US-AK  26.071429
3   US-AZ   5.457143
4   US-AR  26.071429
5   US-CA  40.142857
6   US-CO  10.771429
7   US-CT   9.714286
8   US-DE  26.071429
9   US-DC  26.071429
10  US-FL  10.400000
11  US-GA  10.342857
12  US-HI  26.071429
13  US-ID  26.071429
14  US-IL   2.857143
15  US-IN   2.857143
16  US-IA  26.071429
17  US-KS  26.071429
18  US-KY  26.071429
19  US-LA  26.071429
20  US-ME  26.071429
21  US-MD   8.457143
22  US-MA   5.600000
23  US-MI   2.857143
24  US-MN   2.857143
25  US-MS  26.071429
26  US-MO   2.857143
27  US-MT  26.071429
28  US-NE  26.071429
29  US-NV   5.600000
30  US-NH  26.071429
31  US-NJ   7.828571
32  US-NM  26.071429
33  US-NY  35.000000
34  US-NC   8.057143
35  US-ND  26.071429
36  US-OH   2.857143
37  US-OK   2.857143
38  US-OR  26.071429
39  US-PA   7.885714
40  US-RI  26.071429
41  US-SC   5.714286
42  US-SD  26.071429
43  US-TN  26.071429
44  US-TX  12.485714
45  US-UT  26.071429
46  US-VT  26

In [8]:

agg_all(states, '2016')

    state      score
0      US  26.542553
1   US-AL  13.271277
2   US-AK  13.271277
3   US-AZ  13.271277
4   US-AR  13.271277
5   US-CA  27.691489
6   US-CO  13.271277
7   US-CT  13.271277
8   US-DE  13.271277
9   US-DC  13.271277
10  US-FL  25.936170
11  US-GA  13.271277
12  US-HI  13.271277
13  US-ID  13.271277
14  US-IL  22.680851
15  US-IN  13.271277
16  US-IA  13.271277
17  US-KS  13.271277
18  US-KY  13.271277
19  US-LA  13.271277
20  US-ME  13.271277
21  US-MD  13.271277
22  US-MA  13.271277
23  US-MI  13.271277
24  US-MN  13.271277
25  US-MS  13.271277
26  US-MO  13.271277
27  US-MT  13.271277
28  US-NE  13.271277
29  US-NV  13.271277
30  US-NH  13.271277
31  US-NJ  20.148936
32  US-NM  13.271277
33  US-NY  28.021277
34  US-NC  13.271277
35  US-ND  13.271277
36  US-OH  13.271277
37  US-OK  13.271277
38  US-OR  13.271277
39  US-PA  21.085106
40  US-RI  13.271277
41  US-SC  13.271277
42  US-SD  13.271277
43  US-TN  13.271277
44  US-TX  19.500000
45  US-UT  13.271277
46  US-VT  13

In [9]:
agg_all(states, '2012')

    state      score
0      US  33.655914
1   US-AL   8.043011
2   US-AK   1.075269
3   US-AZ  12.064516
4   US-AR   7.881720
5   US-CA  33.881720
6   US-CO  13.612903
7   US-CT  16.333333
8   US-DE   2.075269
9   US-DC  13.311828
10  US-FL  20.698925
11  US-GA  22.741935
12  US-HI   9.107527
13  US-ID   1.827957
14  US-IL  19.924731
15  US-IN   8.000000
16  US-IA  10.451613
17  US-KS  10.096774
18  US-KY  13.483871
19  US-LA   7.215054
20  US-ME   1.075269
21  US-MD  10.172043
22  US-MA  23.182796
23  US-MI  12.268817
24  US-MN   7.494624
25  US-MS   2.774194
26  US-MO  13.505376
27  US-MT   1.075269
28  US-NE   7.526882
29  US-NV   4.677419
30  US-NH   5.021505
31  US-NJ  26.516129
32  US-NM   8.182796
33  US-NY  34.741935
34  US-NC  20.591398
35  US-ND   3.838710
36  US-OH  13.440860
37  US-OK   6.462366
38  US-OR  10.795699
39  US-PA  16.301075
40  US-RI   2.000000
41  US-SC   9.569892
42  US-SD   1.075269
43  US-TN  11.494624
44  US-TX  23.129032
45  US-UT   6.043011
46  US-VT   3