In [None]:
import os
import pandas as pd
import yaml
import configparser

# Functions

In [None]:
# generate a list of legislators that have twitter handles
def parse_twitter_handles(config):
    yaml_doc_path = config.get('data', 'path') + '/' + config.get('data', 'twitter_yaml')
    if not os.path.isfile(yaml_doc_path):
        fetch_twitter_handles()
    yaml_doc = yaml.load(open(yaml_doc_path, 'r'))
    return [d for d in yaml_doc if 'twitter' in d['social']]

# create a dict where the key is the date and the value is a 
# list of lists [[moc, score],[moc, score],...] for that day
def parse_moc_scores(config, score_files):
    moc_scores = {}
    for f in score_files:
        score_path = config.get('data', 'path') + '/' + config.get('data', 'scores') + '/' + f
        if '.1.' in f:
            components = f.split('.')
            date = components[0]
            if components[2] == 'moc':
                with open(score_path) as score_file:
                    scores = score_file.readlines()
                moc_scores[date] = scores
    return moc_scores

# create a dict of scores for each legislator
# on each date for which scores are available
def read_scores(legislators, scores):
    leg_scores = {}
    for l in legislators:
        handle = l['social']['twitter'].lower()
        leg_scores[handle] = {}

    for date in scores:
        for score_item in scores[date]:
            handle, score = score_item.split()
            leg_scores[handle][date] = score
    return leg_scores

# main()

In [None]:
config = configparser.ConfigParser()
config.read('settings.cfg')
score_path = config.get('data', 'path') + '/' + config.get('data', 'scores') + '/'
score_files = os.listdir(score_path)
moc_scores = parse_moc_scores(config, score_files)
mocs_with_twitter = parse_twitter_handles(config)
moc_scores_by_date = read_scores(mocs_with_twitter, moc_scores)
df = pd.DataFrame(dict([(k, pd.Series(v)) for k,v in moc_scores_by_date.items()])).transpose().dropna(how='all')
data_file = config.get('data', 'path') + '/' + config.get('data', 'moc_scores')
df.to_csv(data_file, sep='\t')

Now read data file saved from AWS server and do next steps for analysis.

In [None]:
full_df = pd.read_csv('moc_scores_by_date.csv', sep = '\t')

Labor Day 2016 - Monday September 5. Tuesday, September 6 is 9 weeks before election. 2016-09-

In [None]:
all_dates = list(full_df.columns.values)
len(all_dates)

In [None]:
last_date = all_dates[365]
last_date

In [None]:
import datetime
date_parts = last_date.strip('-')

start_date = datetime.date(2016, 9, 6)
end_date = datetime.date(2016, 11, 8)

In [None]:
date_parts

In [None]:
len(last_date)

In [None]:
year = int(last_date[:4])
month = int(last_date[5:7])
day = int(last_date[8:])

In [None]:
interesting_dates = []
for date in all_dates:
    try:
        year = int(date[:4])
        month = int(date[5:7])
        day = int(date[8:])
    except:
        continue
    
    date_in_question = datetime.date(year, month, day)
    if start_date <= date_in_question <= end_date:
        interesting_dates.append(date)

In [None]:
interesting_df = full_df.filter(interesting_dates)
interesting_df.to_csv('purpletag_moc_scores_election_2016.csv', sep='\t')

# Legislator Info

In [1]:
import yaml
yaml_doc = yaml.load(open('legislators-current.yaml', 'r'))
#return [d for d in yaml_doc if 'twitter' in d['social']]

In [24]:
#yaml_doc[0]

In [22]:
name = yaml_doc[0]['name']['official_full']
chamber = yaml_doc[0]['terms'][-1]['type']
state = yaml_doc[0]['terms'][-1]['state']

if yaml_doc[-1]['terms'][-1]['type'] == 'rep':
    district = yaml_doc[-1]['terms'][-1]['district']
else:
    district = 99

for x in reversed(range(len(yaml_doc[0]['terms']))):
    if yaml_doc[0]['terms'][x]['type'] == chamber:
        start = yaml_doc[0]['terms'][x]['start']
        
party = yaml_doc[0]['terms'][-1]['party']
url = yaml_doc[0]['terms'][-1]['url']

legislator = {'name': name, 'chamber': chamber, 'state': state, 'district': district, 'start_date': start, 'party': party, 'url': url}

In [23]:
legislator

{'chamber': 'sen',
 'district': 2,
 'name': 'Sherrod Brown',
 'party': 'Democrat',
 'start_date': '2007-01-04',
 'state': 'OH',
 'url': 'https://www.brown.senate.gov'}

In [26]:
all_reps = []
for item in yaml_doc:
    name = item['name']['official_full']
    chamber = item['terms'][-1]['type']
    state = item['terms'][-1]['state']
    # only house has district, senators are assigned district 99
    if item['terms'][-1]['type'] == 'rep':
        district = item['terms'][-1]['district']
    else:
        district = 99
    # start date of first term in current chamber
    for x in reversed(range(len(item['terms']))):
        if item['terms'][x]['type'] == chamber:
            start = item['terms'][x]['start']
    party = item['terms'][-1]['party']
    # not all reps have a url
    if 'url' in item['terms'][-1]:
        url = item['terms'][-1]['url']
    else:
        url = 'none'
    legislator = {'name': name, 'chamber': chamber, 'state': state, 'district': district, 'start_date': start, 'party': party, 'url': url}
    all_reps.append(legislator)

need to map this list of legislators to a list of twitter handles, not every rep uses twitter