## Upload CSV Files

In [117]:
import pandas as pd
import sys
import os, os.path
from collections import defaultdict

data_dir = os.path.join(os.getcwd(), 'data/')

rep_and_district_info_filename = 'Names_Districts_Counties.csv'
county_asthma_info_filename = 'Asthma_Data_ALA_6.26.2019.csv'
county_polling_info_filename = 'Yale_Polling.csv'
voting_history_filename = 'vote_history.csv'
daily_kos_path = os.path.join(data_dir, 'daily-kos/congressional-districts-to-counties/')

rep_and_district_info = pd.read_csv(data_dir + rep_and_district_info_filename, encoding = "ISO-8859-1")
county_asthma_info = pd.read_csv(data_dir + county_asthma_info_filename, encoding = "ISO-8859-1")
county_polling_info = pd.read_csv(data_dir + county_polling_info_filename, encoding = "ISO-8859-1")
voting_history = pd.read_csv(data_dir + voting_history_filename, encoding = "ISO-8859-1")

district_county_info = {}
for filename in os.listdir(daily_kos_path):
    state_abr = os.path.splitext(filename)[0]
    file_path = os.path.join(daily_kos_path, filename)
    district_county_info[state_abr] = pd.read_csv(file_path, encoding = "ISO-8859-1")


In [118]:
# Make sure the data was read correctly and is what we expect

print(rep_and_district_info.head(3))
print("-"*40)
print(county_asthma_info.head(3))
print("-"*40)
print(county_polling_info.head(3))
print("-"*40)
print(voting_history.head(3))
print("-"*40)
print(district_county_info["MA"].head(3))

   Branch First Name Last Name District Party  \
0  Senate     George    Barker       39     D   
1  Senate    Richard     Black       13     R   
2  Senate   Jennifer    Boysko       33     D   

                                            Counties  
0  Fairfax County, Prince William County, Alexand...  
1            Loudoun County, Prince William County,   
2                   Fairfax County, Loudoun County,   
----------------------------------------
            County Total Pop Under 18 65 & Over Pediatric Asthma Adult Asthma  \
0         Accomack    32,545    6,756     7,402              536        2,245   
1        Albemarle   107,702   21,636    19,153            1,719        7,565   
2  Alexandria City   160,035   28,866    17,715            2,293       11,685   

    COPD Lung Cancer CV Disease Diabetes Poverty Estimate  
0  2,045          18      2,723    3,222            5,715  
1  6,096          59      7,726    9,287            8,015  
2  8,058          88      9,353   11,

## Generate District to County Info.


In [119]:
# rep_and_district_info_counties = rep_and_district_info['Counties']
# rep_and_district_info_districts = rep_and_district_info['District']

# #Clean up district values
# rep_and_district_info_counties = pd.Series(rep_and_district_info_counties.str.split(pat=","))

# all_counties = []
# county_lists = []
# for district_to_counties_list in rep_and_district_info_counties:
#     sanitized_counties = []
#     for county in district_to_counties_list:
#         sanitized_county_name = county.replace(' County','').strip()
#         if sanitized_county_name:
#             all_counties.append(sanitized_county_name)
#             sanitized_counties.append(sanitized_county_name)
#     county_lists.append(sanitized_counties)    
            
# districts = list(rep_and_district_info['District'])
# for i in range(len(districts)):        
#     if "th" in districts[i] or "st" in districts[i] or "nd" in districts[i] or "rd" in districts[i]:
#             districts[i] = districts[i][:-2]
            
# district_to_counties = dict(zip(districts, county_lists))

# all_counties = list(set(all_counties))
# all_counties = list([i for i in all_counties if i])

district_to_counties_by_state = {}
for state, state_info in district_county_info.items():
    district_to_counties = {}
    for idx, row in state_info.iterrows():
        cd_num = row.get("CD #.1")
        county = row.get("County.1")
        county_pop_in_district = row.get("County Pop.\nin CD")
        percent_of_cd_in_county = row.get("% of CD\nin County")

        if not cd_num or not county:
            continue
            
        cd_key = str(cd_num)
            
        if cd_key not in district_to_counties:
            district_to_counties[cd_key] = []
        
        district_to_counties[cd_key].append((
            county.lower(), 
            county, 
            county_pop_in_district, 
            percent_of_cd_in_county))
    district_to_counties_by_state[state] = district_to_counties   

In [120]:
# Sanity check the outputs

district_to_counties_by_state["VA"]

{'1': [('caroline', 'Caroline', 28545, 0.039),
  ('essex', 'Essex', 11151, 0.015),
  ('fauquier', 'Fauquier', 14762, 0.02),
  ('gloucester', 'Gloucester', 36858, 0.051),
  ('hanover', 'Hanover', 99863, 0.13699999999999998),
  ('james city', 'James City', 50453, 0.069),
  ('king and queen', 'King and Queen', 6945, 0.01),
  ('king george', 'King George', 23584, 0.032),
  ('king william', 'King William', 15935, 0.022000000000000002),
  ('lancaster', 'Lancaster', 11391, 0.016),
  ('mathews', 'Mathews', 8978, 0.012),
  ('middlesex', 'Middlesex', 10959, 0.015),
  ('new kent', 'New Kent', 18429, 0.025),
  ('northumberland', 'Northumberland', 12330, 0.017),
  ('prince william', 'Prince William', 167068, 0.23),
  ('richmond', 'Richmond', 9254, 0.013000000000000001),
  ('spotsylvania', 'Spotsylvania', 30160, 0.040999999999999995),
  ('stafford', 'Stafford', 128961, 0.177),
  ('westmoreland', 'Westmoreland', 17454, 0.024),
  ('fredericksburg city', 'Fredericksburg city', 24286, 0.033)],
 '2': [('

In [121]:
# Drop labels 133 to get rid of "total" column
asthma_info_counties = [name.lower() for name in county_asthma_info['County'].drop(labels=133)]
asthma_info_children = list(county_asthma_info['Pediatric Asthma'].drop(labels=133))
asthma_info_adults = list(county_asthma_info['Adult Asthma'].drop(labels=133))

asthma_info_children = [int(x.replace(',', '')) for x in asthma_info_children]
asthma_info_adults = [int(x.replace(',', '')) for x in asthma_info_adults]

county_to_asthma_children = dict(zip(asthma_info_counties, asthma_info_children))
county_to_asthma_adults = dict(zip(asthma_info_counties, asthma_info_adults))

In [122]:
county_to_asthma_adults

{'accomack': 2245,
 'albemarle': 7565,
 'alexandria city': 11685,
 'alleghany': 1064,
 'amelia': 904,
 'amherst': 2209,
 'appomattox': 1076,
 'arlington': 17210,
 'augusta': 5311,
 'bath': 315,
 'bedford': 5461,
 'bland': 465,
 'botetourt': 2349,
 'bristol city': 1167,
 'brunswick': 1174,
 'buchanan': 1549,
 'buckingham': 1230,
 'buena vista city': 450,
 'campbell': 3871,
 'caroline': 2056,
 'carroll': 2104,
 'charles city': 516,
 'charlotte': 830,
 'charlottesville city': 3592,
 'chesapeake city': 16141,
 'chesterfield': 23149,
 'clarke': 1013,
 'colonial heights city': 1184,
 'covington city': 393,
 'craig': 360,
 'culpeper': 3391,
 'cumberland': 686,
 'danville city': 2799,
 'dickenson': 1032,
 'dinwiddie': 1987,
 'emporia city': 355,
 'essex': 775,
 'fairfax': 78010,
 'fairfax city': 1622,
 'falls church city': 968,
 'fauquier': 4695,
 'floyd': 1097,
 'fluvanna': 1851,
 'franklin': 3967,
 'franklin city': 544,
 'frederick': 5844,
 'fredericksburg city': 2000,
 'galax city': 452,
 '

# Asthma

## Sum childhood, adult, and total asthma for the counties that make up each district

Text: In the four counties that make up House District 100, 10,000 kids and 20,000 adults live with asthma.

In [123]:
# Make a mapping of istrict to asthma totals, # of counts of district.

district_to_asthma_text = {}
for district, county_info in district_to_counties_by_state["VA"].items():    
    child_asthma_count = 0
    adult_asthma_count = 0
    for (county_key, county, population, percent_of_district) in county_info:
        child_asthma_count += county_to_asthma_children[county_key]
        adult_asthma_count += county_to_asthma_adults[county_key]
    
    if len(county_info) > 1:
        district_to_asthma_text[district] =  """In the %d counties that make up House District %s, %d kids and %d adults live with asthma.""" % (len(county_info), district, child_asthma_count, adult_asthma_count)
    else:
        district_to_asthma_text[district] =  """In the House District %s, %d kids and %d adults live with asthma.""" % (district, child_asthma_count, adult_asthma_count)

In [124]:
district_to_asthma_text

{'1': 'In the 20 counties that make up House District 1, 24087 kids and 83052 adults live with asthma.',
 '2': 'In the 10 counties that make up House District 2, 20918 kids and 84956 adults live with asthma.',
 '3': 'In the 8 counties that make up House District 3, 18344 kids and 70715 adults live with asthma.',
 '4': 'In the 16 counties that make up House District 4, 25365 kids and 98004 adults live with asthma.',
 '5': 'In the 24 counties that make up House District 5, 13653 kids and 60324 adults live with asthma.',
 '6': 'In the 19 counties that make up House District 6, 13172 kids and 57676 adults live with asthma.',
 '7': 'In the 10 counties that make up House District 7, 18512 kids and 68391 adults live with asthma.',
 '8': 'In the 4 counties that make up House District 8, 27296 kids and 107873 adults live with asthma.',
 '9': 'In the 29 counties that make up House District 9, 11758 kids and 56375 adults live with asthma.',
 '10': 'In the 8 counties that make up House District 10

# Polling

## Create a weighted average of polling results for the counties that make up each district

Text: 

In the counties that make up this district, [weighted average]% know that global warming is happening, and [weighted average] are somewhat or very worried about it. [weighted average]% support regulating CO2 as a pollutant, and [weighted average] want to provide tax rebates for people who purchase energy-efficient vehicles or solar panels. 


- Number of people who know that global warming is happening: 
    - [[x%]] in [[least populous county]], and [[y%]] in [[most populous county]]
- Percent who are somewhat or very worried about climate change: 
    - [[x%]] in [[least populous county]], and [[y%]] in [[most populous county]]
- Support regulating CO2 as a pollutant: 
    - [[x%]] in [[least populous county]], and [[y%]] in [[most populous county]]
- Support tax rebates for people who purchase energy-efficient vehicles or solar panels: 
    - [[x%]] in [[least populous county]], and [[y%]] in [[most populous county]]


In [125]:
county_polling_info_counties = [name.lower() for name in county_polling_info['GeoName']]
# county_polling_info_pop = dict(zip(county_polling_info_counties,list(county_polling_info['TotalPop'])))
county_polling_info_happening = dict(zip(county_polling_info_counties,list(county_polling_info['happening'])))
county_polling_info_worried = dict(zip(county_polling_info_counties,list(county_polling_info['worried'])))
county_polling_info_regulate = dict(zip(county_polling_info_counties,list(county_polling_info['regulate'])))
county_polling_info_rebates = dict(zip(county_polling_info_counties,list(county_polling_info['rebates'])))

In [126]:
district_to_overview = {}
for district,county_info in district_to_counties_by_state["VA"].items():    
    total_pop, know_num, worried_num, regulate_num, rebates_num = 0.0,0.0,0.0,0.0,0.0
    
    for (county_key, county, population, percent_of_district) in county_info:
        total_pop += population
        know_num += county_polling_info_happening[county_key] * population
        worried_num += county_polling_info_worried[county_key] * population
        regulate_num += county_polling_info_regulate[county_key] * population
        rebates_num += county_polling_info_rebates[county_key] * population
        
    district_to_overview[district] =  ("In the counties that make up"
                                       " this district, {0:.2f}% know that"
                                       " global warming is happening,"
                                       " and {1:.2f}% are somewhat or very"
                                       " worried about it. {2:.2f}% support"
                                       " regulating CO2 as a pollutant,"
                                       " and {3:.2f}% want to provide tax"
                                       " rebates for people who purchase"
                                       " energy-efficient vehicles or "
                                       "solar panels.").format(know_num/total_pop, worried_num/total_pop, regulate_num/total_pop, rebates_num/total_pop)

In [127]:
print(district_to_overview)

{'1': 'In the counties that make up this district, 69.52% know that global warming is happening, and 58.79% are somewhat or very worried about it. 76.55% support regulating CO2 as a pollutant, and 81.48% want to provide tax rebates for people who purchase energy-efficient vehicles or solar panels.', '2': 'In the counties that make up this district, 70.15% know that global warming is happening, and 58.99% are somewhat or very worried about it. 76.95% support regulating CO2 as a pollutant, and 81.56% want to provide tax rebates for people who purchase energy-efficient vehicles or solar panels.', '3': 'In the counties that make up this district, 73.88% know that global warming is happening, and 63.24% are somewhat or very worried about it. 79.78% support regulating CO2 as a pollutant, and 83.16% want to provide tax rebates for people who purchase energy-efficient vehicles or solar panels.', '4': 'In the counties that make up this district, 72.44% know that global warming is happening, and

In [128]:
district_to_specific = {}
for district, county_info in district_to_counties_by_state["VA"].items():
    
    # Format: (county_key, county, population, percent_of_district)
    max_pop_county = max(county_info, key=lambda x:x[2]) # by population
    min_pop_county = min(county_info, key=lambda x:x[2]) # by population
    
    if not max_pop_county or not min_pop_county:
        continue
    min_pop_happening = county_polling_info_happening[min_pop_county[0]]
    min_pop_worried = county_polling_info_worried[min_pop_county[0]]
    min_pop_regulate = county_polling_info_regulate[min_pop_county[0]]
    min_pop_rebates = county_polling_info_rebates[min_pop_county[0]]
    max_pop_happening = county_polling_info_happening[max_pop_county[0]]
    max_pop_worried = county_polling_info_worried[max_pop_county[0]]
    max_pop_regulate = county_polling_info_regulate[max_pop_county[0]]
    max_pop_rebates = county_polling_info_rebates[max_pop_county[0]]
    
    district_to_specific[district] = ("- Number of people who know that global warming is happening:" +
        "\n\t- {0:.2f}% in {1}, and {2:.2f}% in {3}").format(min_pop_happening, min_pop_county[1], max_pop_happening, max_pop_county[1])
    district_to_specific[district] += (
    "\n- Percent who are somewhat or very worried about climate change: "
        "\n\t- {0:.2f}% in {1}, and {2:.2f}% in {3}").format(min_pop_worried, min_pop_county[1], max_pop_worried, max_pop_county[1])

    district_to_specific[district] += (
    "\n- Support regulating CO2 as a pollutant: "
        "\n\t- {0:.2f}% in {1}, and {2:.2f}% in {3}"
    ).format(min_pop_regulate, min_pop_county[1], max_pop_regulate, max_pop_county[1])

    district_to_specific[district] += (
    "\n- Support tax rebates for people who purchase energy-efficient vehicles or solar panels: "
        "\n\t- {0:.2f}% in {1}, and {2:.2f}% in {3}").format(min_pop_rebates, min_pop_county, max_pop_rebates, max_pop_county)

In [129]:
print(district_to_specific)

{'1': "- Number of people who know that global warming is happening:\n\t- 65.16% in King and Queen, and 73.28% in Prince William\n- Percent who are somewhat or very worried about climate change: \n\t- 54.63% in King and Queen, and 65.14% in Prince William\n- Support regulating CO2 as a pollutant: \n\t- 74.52% in King and Queen, and 78.52% in Prince William\n- Support tax rebates for people who purchase energy-efficient vehicles or solar panels: \n\t- 79.88% in ('king and queen', 'King and Queen', 6945, 0.01), and 83.82% in ('prince william', 'Prince William', 167068, 0.23)", '2': "- Number of people who know that global warming is happening:\n\t- 76.53% in Newport News city, and 69.45% in Virginia Beach city\n- Percent who are somewhat or very worried about climate change: \n\t- 65.33% in Newport News city, and 57.52% in Virginia Beach city\n- Support regulating CO2 as a pollutant: \n\t- 80.18% in Newport News city, and 76.40% in Virginia Beach city\n- Support tax rebates for people wh

# Opposition Research

1. Build out an excel file with the following:
   - Name of incumbent
   - Important bills, and a vote (positive/negative)
   - 2 descriptions for each bill: positive and negative
   - The correct description to use, based on the vote history

2. Match incumbent names into the correct description, then add all descriptions together to create an opposition research paragraph that looks like this:

When it comes to common sense clean energy reforms, [[incumbent]] has a mixed record. They voted to block Virginia from entering regional collaborative efforts to combat climate change, specifically by voting to prevent Virginia from entering the Regional Greenhouse Gas Initiative. States already in this program have seen lower electricity bills and less pollution – joining it is a no-brainer. [[He/She]] also voted against integrating environmental education into Virginia's classrooms. 

However, [[incumbent]]’s record is not all bad. [[incumbent last name]] voted to ensure that the State Corporation Commission cannot reject or cut climate-forward legislation without reason. They also voted to establish a Clean Energy Advisory Board, which will run a pilot program to allow low-to-moderate income households to receive rebates for solar panels. Finally, [[Incumbent]] voted to ensure that coal plants dispose of coal ash– one of the largest types of industrial waste generated in the United States– safely and sustainably.



In [130]:
rep_and_district_info_districts = list(rep_and_district_info['District'])
rep_and_district_info_candidate_first = list(rep_and_district_info['First Name'])
rep_and_district_info_candidate_last = list(rep_and_district_info['Last Name'])

sanitized_candidate_names = []
for candidate_name in zip(rep_and_district_info_candidate_first,rep_and_district_info_candidate_last):
    sanitized_candidate_names.append(candidate_name[0] + " " + candidate_name[1])
    
candidate_to_district = {}
for candidate_district_pair in zip(sanitized_candidate_names, rep_and_district_info_districts):
    candidate = candidate_district_pair[0]
    district = candidate_district_pair[1]
    if "th" in district or "st" in district or "nd" in district or "rd" in district:
        candidate_to_district[candidate] = district[:-2]
    else:
        candidate_to_district[candidate] = district

In [131]:
candidate_to_voting_record_sentences = defaultdict(list)

for row in voting_history.values:
    candidate_name = row[1] + " " + row[2]
    for i in range(4, len(row)):
        candidate_to_voting_record_sentences[candidate_name].append(row[i].replace("Candidate", candidate_name))            

In [132]:
candidate_to_voting_record_sentences

defaultdict(list,
            {'George Barker': ['-',
              '-',
              '-',
              '-',
              'George Barker voted to make it more difficult for Virginia to enter regional collaborative efforts to combat climate change in the transportation sector (via the Transportation and Climate Initiative).',
              '-',
              '-',
              'George Barker voted to make it more difficult for Virginia to enter into a regional cap and trade program (RGGI). States in this program have seen lower electricity bills and more clean job growth - joining it makes common sense.',
              '-',
              '-',
              'George Barker voted to ensure that coal plants dispose of coal ash- one of the largest types of industrial waste generated in the United States-Â\xa0safely and sustainably.',
              '-',
              '-',
              "George Barker voted to improve monitoring of elecric utilities' energy efficienty programs, which promot

In [133]:
# Parse the voting record
    # We have positive and negative votes, but I think we really just need the sentences
    # Drop first x columns
candidate_to_voting_record_text = {}
for candidate,voting_record_sentences in candidate_to_voting_record_sentences.items():
    voting_record_text = ""
    for sentence in voting_record_sentences:
        if sentence != "-":
            voting_record_text += sentence + " "
    candidate_to_voting_record_text[candidate] = voting_record_text  

# Final Printing

In [135]:
# Write output to file
with open('output.txt', 'a') as outfile: 
    for candidate in sanitized_candidate_names:
        d = candidate_to_district[candidate]
        if d == '39':
            print(d, candidate, district_to_specific.keys())
        district_text = (
            district_to_asthma_text[d] + "\n\n" + 
            district_to_overview[d] + "\n\n" + 
            district_to_specific[d] + "\n\n" + 
            candidate_to_voting_record_text[candidate]
        )
        
        outfile.write("_" * 80 + "\n")
        outfile.write(district_text + "\n")
        outfile.write("_" * 80 + "\n")

# Print output for convenience
with open('output.txt', 'r') as outfile:
    for line in outfile:
        print(line)

39 George Barker dict_keys(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'])


KeyError: '39'