In [21]:
from tabulate import tabulate
from collections import OrderedDict
import pandas as pd
import os

if not os.path.exists('../content/pages'):
    os.makedirs('../content/pages/')
list_ids = pd.read_csv('../data/list_ids.csv', index_col=0)
info = pd.read_csv('../data/congressperson_all_info.csv', index_col=0)

In [22]:
# Helper function for formatting
def capitalize_first_letters(string):
    if not isinstance(string, str):
        return string
    words = [ii.capitalize() for ii in string.split(' ')]
    return ' '.join(words)

In [23]:
capitalize_columns = ['State']
for col in capitalize_columns:
    list_ids[col] = list_ids[col].map(capitalize_first_letters)

# Create lists page

In [27]:
# Prepare data
meta = OrderedDict(
    Title='Twitter Lists',
    Date='2017-01-29 10:20',
    Modified='2017-01-29 10:20',
    Tags='politics, accountability',
    Category='',
    Slug='twitter-lists',
    Authors='Chris Holdgraf',
    Summary=('A collection of twitter lists of politician accounts,'
                     'broken down by state'))

header = '\n'.join(['{}: {}'.format(key, val) for key, val in meta.items()])

s = 'Find the data and code [here](https://github.com/choldgraf/twitter-politics)\n\n'
s += tabulate(list_ids, headers=list_ids.columns, tablefmt='pipe', showindex='never')
with open('../content/pages/lists.md', 'w') as f:
    f.writelines(header + '\n\n' + s)

# Create full handles page

In [28]:
# Prepare data
use_columns = OrderedDict()
insert_values = [('name', 'Name',), ('state', 'State'), ('body', 'Body',),
                 ('party', 'Party'), ('yrs', 'Years Served'),
                 ('district', 'District'), ('handle', 'Handle')]
for key, val in insert_values:
    use_columns[key] = val
    
# Capitalize letters
capitalize_columns = ['name', 'state']
for col in capitalize_columns:
    info[col] = info[col].map(capitalize_first_letters)

# Subset columns and print    
print_df = info[list(use_columns.keys())]
print_df.columns = use_columns.values()
mask_missing = pd.isnull(print_df['Handle'])
print_df.loc[mask_missing, 'Handle'] = "Missing"
print_df['Link'] = print_df['Handle'].map(
    lambda a: '<a href="http://twitter.com/{}">Link</a>'.format(a))
print_df.loc[mask_missing, 'Handle'] = 'Missing'
print_df.loc[pd.isnull(print_df['District']), 'District'] = 'None'
print_df = print_df.sort_values(['State', 'Body', 'Party', 'Years Served'], ascending=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [29]:
# Generate the handles page
meta = OrderedDict(
    Title='Twitter Handles',
    Date='2017-01-29 10:20',
    Modified='2017-01-29 10:20',
    Tags='politics, accountability',
    Category='',
    Slug='twitter-handles',
    Authors='Chris Holdgraf',
    Summary=('A collection of twitter handles of politician accounts'))
header = '\n'.join(['{}: {}'.format(key, val) for key, val in meta.items()])

s = 'Find the data and code [here](https://github.com/choldgraf/twitter-politics)'
for state, values in print_df.groupby('State'):
    s += '\n\n##{}\n'.format(state)
    values = values.sort_values('Body')
    s += tabulate(values,
                  headers=print_df.columns,
                  tablefmt='pipe', showindex='never')
with open('../content/pages/handles.md', 'w') as f:
    f.writelines(header + '\n\n' + s)