In [2]:
import os
import pandas as pd
import yaml
import configparser

# Functions

In [11]:
# generate a list of legislators that have twitter handles
def parse_twitter_handles(config):
    yaml_doc_path = config.get('data', 'path') + '/' + config.get('data', 'twitter_yaml')
    if not os.path.isfile(yaml_doc_path):
        fetch_twitter_handles()
    yaml_doc = yaml.load(open(yaml_doc_path, 'r'))
    return [d for d in yaml_doc if 'twitter' in d['social']]

# create a dict where the key is the date and the value is a 
# list of lists [[moc, score],[moc, score],...] for that day
def parse_moc_scores(config, score_files):
    moc_scores = {}
    for f in score_files:
        score_path = config.get('data', 'path') + '/' + config.get('data', 'scores') + '/' + f
        if '.1.' in f:
            components = f.split('.')
            date = components[0]
            if components[2] == 'moc':
                with open(score_path) as score_file:
                    scores = score_file.readlines()
                moc_scores[date] = scores
    return moc_scores

# create a dict of scores for each legislator
# on each date for which scores are available
def read_scores(legislators, scores):
    leg_scores = {}
    for l in legislators:
        handle = l['social']['twitter'].lower()
        leg_scores[handle] = {}

    for date in scores:
        for score_item in scores[date]:
            handle, score = score_item.split()
            leg_scores[handle][date] = score
    return leg_scores

# main()

In [12]:
config = configparser.ConfigParser()
config.read('settings.cfg')
score_path = config.get('data', 'path') + '/' + config.get('data', 'scores') + '/'
score_files = os.listdir(score_path)
moc_scores = parse_moc_scores(config, score_files)
mocs_with_twitter = parse_twitter_handles(config)
moc_scores_by_date = read_scores(mocs_with_twitter, moc_scores)
df = pd.DataFrame(dict([(k, pd.Series(v)) for k,v in moc_scores_by_date.items()])).transpose().dropna(how='all')
data_file = config.get('data', 'path') + '/' + config.get('data', 'moc_scores')
df.to_csv(data_file, sep='\t')

Now read data file saved from AWS server and do next steps for analysis.

In [4]:
full_df = pd.read_csv('moc_scores_by_date.csv', sep = '\t')

Labor Day 2016 - Monday September 5. Tuesday, September 6 is 9 weeks before election. 2016-09-

In [7]:
all_dates = list(full_df.columns.values)
len(all_dates)

366

In [10]:
last_date = all_dates[365]
last_date

'2016-11-08'

In [14]:
import datetime
date_parts = last_date.strip('-')

start_date = datetime.date(2016, 9, 6)
end_date = datetime.date(2016, 11, 8)

In [15]:
date_parts

'2016-11-08'

In [18]:
len(last_date)

10

In [27]:
year = int(last_date[:4])
month = int(last_date[5:7])
day = int(last_date[8:])

In [35]:
interesting_dates = []
for date in all_dates:
    try:
        year = int(date[:4])
        month = int(date[5:7])
        day = int(date[8:])
    except:
        continue
    
    date_in_question = datetime.date(year, month, day)
    if start_date <= date_in_question <= end_date:
        interesting_dates.append(date)

In [36]:
interesting_dates

['2016-09-06',
 '2016-09-07',
 '2016-09-08',
 '2016-09-09',
 '2016-09-10',
 '2016-09-11',
 '2016-09-12',
 '2016-09-13',
 '2016-09-14',
 '2016-09-15',
 '2016-09-16',
 '2016-09-17',
 '2016-09-18',
 '2016-09-19',
 '2016-09-20',
 '2016-09-21',
 '2016-09-22',
 '2016-09-23',
 '2016-09-24',
 '2016-09-25',
 '2016-09-26',
 '2016-09-27',
 '2016-09-28',
 '2016-09-29',
 '2016-09-30',
 '2016-10-01',
 '2016-10-02',
 '2016-10-03',
 '2016-10-04',
 '2016-10-05',
 '2016-10-06',
 '2016-10-07',
 '2016-10-08',
 '2016-10-09',
 '2016-10-10',
 '2016-10-11',
 '2016-10-12',
 '2016-10-13',
 '2016-10-14',
 '2016-10-15',
 '2016-10-16',
 '2016-10-17',
 '2016-10-18',
 '2016-10-19',
 '2016-10-20',
 '2016-10-21',
 '2016-10-22',
 '2016-10-23',
 '2016-10-24',
 '2016-10-25',
 '2016-10-26',
 '2016-10-27',
 '2016-10-28',
 '2016-10-29',
 '2016-10-30',
 '2016-10-31',
 '2016-11-01',
 '2016-11-02',
 '2016-11-03',
 '2016-11-04',
 '2016-11-05',
 '2016-11-06',
 '2016-11-07',
 '2016-11-08']

In [40]:
interesting_df = full_df.filter(interesting_dates)

In [41]:
interesting_df

Unnamed: 0,2016-09-06,2016-09-07,2016-09-08,2016-09-09,2016-09-10,2016-09-11,2016-09-12,2016-09-13,2016-09-14,2016-09-15,...,2016-10-30,2016-10-31,2016-11-01,2016-11-02,2016-11-03,2016-11-04,2016-11-05,2016-11-06,2016-11-07,2016-11-08
0,,,,,,,,,7.803470,2.060600,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,-2.094980,-52.704000,-76.581500,-83.872500,-68.092100,,,-103.656000,-55.075900,-308.280000,...,,,-90.838000,-61.343500,-40.482000,,-33.451300,,,
3,,,-2.871430,,,,,-2.303890,-2.306660,,...,,,,-17.954200,,,,,,
4,,,,,,,1.379970,4.426230,,14.770600,...,,,,,,,,,,
5,,,,,,,,,,-1.941180,...,,,,-0.748092,,,,,,
6,-1.372290,-0.976000,-1.914290,,-2.346460,,0.046126,,-2.306670,,...,,,,-6.732820,,,,,,
7,,,,1.934210,1.422180,,,,,,...,,,,,,,,,,
8,,,20.608200,13.539500,3.218790,0.884615,1.379970,2.696860,,-5.163230,...,7.822220,3.75758,3.747290,34.176600,17.556800,11.632700,1.159570,0.607843,5,13.455900
9,-1.372290,-71.281500,-4.785720,-81.804500,-75.348400,,-2.013730,-3.388890,-55.075900,-231.233000,...,,,-1.368150,-44.137400,-4.391300,-7.221050,-35.176100,,,-4.459010


In [42]:
interesting_df.to_csv('purpletag_moc_scores_election_2016.csv', sep='\t')