In [2]:
import json
import os
import glob
import re
import pandas as pd
from os import makedirs
from os.path import join, exists
from pathlib import Path
from datetime import datetime, date, timedelta
from pathlib import Path

In [3]:
def drop_df_columns(df):
    cols = df.columns
    for c in cols:
        if 'isPartial' in c:
            df = df.drop([c], axis=1)
            
    return df

In [10]:
def get_and_save_trends(swing_states, src_path, target_path):
    
    makedirs(target_path, exist_ok=True)
    result_df = []

    for st in swing_states:
        file_path = join(src_path, st + '_trends.csv')
        df = pd.read_csv(file_path)
        
        df = drop_df_columns(df)
        
        if len(df) == 0:
            continue
        
        if len(result_df) > 0:
            result_df = pd.merge(df, result_df, how='inner', on=['date', 'date'], 
                                 suffixes=('_' + st + '1', '_y' + st + '2'))
        else:
            result_df = df
       
    print(result_df.columns)
    
    col = result_df.loc[: , result_df.columns[1]: result_df.columns[len(result_df.columns) - 1]]
    
    result_df['score'] = col.mean(axis=1)
    
    result_df_min = result_df[['date', 'score']]
    result_df_min.to_csv(join(target_path, 'trends.csv'))
    

In [11]:
def get_candidates(yr):
    if yr == '2012':
        return 'obama', 'romney'
    elif yr == '2016':
        return 'hillary', 'trump'
    else:
        return 'biden', 'trump'

In [14]:
def agg_all(states, yr):
    topics = ['candidate', 'economy', 'environment', 'health', 'immigration']
    years = ['2012', '2016', '2020']
    
    for tp in topics:
        cand1, cand2 = get_candidates(yr)

        src_path = join('data-set-raw-3', tp, yr)
        target_path = join('data-aggregated', tp, yr)

        get_and_save_trends(states, join(src_path, cand1), join(target_path, cand1)) 

        get_and_save_trends(states, join(src_path, cand2), join(target_path, cand2)) 

In [15]:
sws_20 = ['US', 'US-PA', 'US-FL', 'US-OH', 'US-VA', 'US-AZ', 'US-WI', 'US-NV', 'US-ME', 'US-MI', 'US-IA']
agg_all(sws_20, '2020')

Index(['date', 'vote biden', 'vote biden_US-MI1', 'vote biden_yUS-MI2',
       'vote biden_US-NV1', 'vote biden_yUS-NV2', 'vote biden_US-AZ1',
       'vote biden_yUS-AZ2', 'vote biden_US-OH1', 'vote biden_yUS-OH2',
       'vote biden_US-PA1', 'vote biden_yUS-PA2'],
      dtype='object')
Index(['date', 'vote trump', 'vote trump_US-MI1', 'vote trump_yUS-MI2',
       'vote trump_US-NV1', 'vote trump_yUS-NV2', 'vote trump_US-AZ1',
       'vote trump_yUS-AZ2', 'vote trump_US-OH1', 'vote trump_yUS-OH2',
       'vote trump_US-PA1', 'vote trump_yUS-PA2'],
      dtype='object')
Index(['date', 'biden immigration', 'biden immigration_US-WI1',
       'biden immigration_yUS-WI2', 'biden immigration_US-VA1',
       'biden immigration_yUS-VA2', 'biden immigration_US-PA1',
       'biden immigration_yUS-PA2'],
      dtype='object')
Index(['date', 'trump immigration_US-IA1', 'trump immigration_yUS-IA2',
       'trump immigration_US-NV1', 'trump immigration_yUS-NV2',
       'trump immigration_US-AZ1', 't

In [16]:
sws_16 = ['US', 'US-CO', 'US-FL', 'US-IA', 'US-MI', 'US-MS', 'US-NV', 'US-NH', 'US-OH', 'US-NC', \
          'US-PA', 'US-VA', 'US-WI']
agg_all(sws_16, '2016')

Index(['date', 'vote hillary', 'vote hillary_US-VA1', 'vote hillary_yUS-VA2',
       'vote hillary_US-NC1', 'vote hillary_yUS-NC2', 'vote hillary_US-NH1',
       'vote hillary_yUS-NH2', 'vote hillary_US-MS1', 'vote hillary_yUS-MS2',
       'vote hillary_US-IA1', 'vote hillary_yUS-IA2', 'vote hillary_US-CO1',
       'vote hillary_yUS-CO2'],
      dtype='object')
Index(['date', 'vote trump', 'vote trump_US-VA1', 'vote trump_yUS-VA2',
       'vote trump_US-NC1', 'vote trump_yUS-NC2', 'vote trump_US-NH1',
       'vote trump_yUS-NH2', 'vote trump_US-MS1', 'vote trump_yUS-MS2',
       'vote trump_US-IA1', 'vote trump_yUS-IA2', 'vote trump_US-CO1',
       'vote trump_yUS-CO2'],
      dtype='object')
Index(['date', 'hillary immigration', 'hillary immigration_US-FL1',
       'hillary immigration_yUS-FL2'],
      dtype='object')
Index(['date', 'trump immigration', 'trump immigration_US-VA1',
       'trump immigration_yUS-VA2', 'trump immigration_US-NC1',
       'trump immigration_yUS-NC2', 'trum

In [17]:
sws_12 = ['US', 'US-CO', 'US-FL', 'US-IA', 'US-NV', 'US-NC', 'US-OH', 'US-VA', 'US-WI', 'US-PA', \
          'US-MI', 'US-MN', 'US-NM']
agg_all(sws_12, '2012')

Index(['date', 'vote obama', 'vote obama_US-MN1', 'vote obama_yUS-MN2',
       'vote obama_US-PA1', 'vote obama_yUS-PA2', 'vote obama_US-VA1',
       'vote obama_yUS-VA2', 'vote obama_US-NC1', 'vote obama_yUS-NC2',
       'vote obama_US-IA1', 'vote obama_yUS-IA2', 'vote obama_US-CO1',
       'vote obama_yUS-CO2'],
      dtype='object')
Index(['date', 'vote romney_US-MN1', 'vote romney_yUS-MN2',
       'vote romney_US-PA1', 'vote romney_yUS-PA2', 'vote romney_US-VA1',
       'vote romney_yUS-VA2', 'vote romney_US-NC1', 'vote romney_yUS-NC2',
       'vote romney_US-IA1', 'vote romney_yUS-IA2', 'vote romney_US-CO1',
       'vote romney_yUS-CO2'],
      dtype='object')
Index(['date', 'obama immigration', 'obama immigration_US-MN1',
       'obama immigration_yUS-MN2', 'obama immigration_US-PA1',
       'obama immigration_yUS-PA2', 'obama immigration_US-VA1',
       'obama immigration_yUS-VA2', 'obama immigration_US-NC1',
       'obama immigration_yUS-NC2', 'obama immigration_US-IA1',
      