## Upload CSV Files

In [114]:
import pandas as pd
import sys
import os, os.path
from collections import defaultdict

data_dir = os.path.join(os.getcwd(), 'data/')

rep_and_district_info_filename = 'Names_Districts_Counties.csv'
county_asthma_info_filename = 'Asthma_Data_ALA_6.26.2019.csv'
county_polling_info_filename = 'Yale_Polling.csv'
voting_history_filename = 'vote_history.csv'

rep_and_district_info = pd.read_csv(data_dir + rep_and_district_info_filename, encoding = "ISO-8859-1")
county_asthma_info = pd.read_csv(data_dir + county_asthma_info_filename, encoding = "ISO-8859-1")
county_polling_info = pd.read_csv(data_dir + county_polling_info_filename, encoding = "ISO-8859-1")
voting_history = pd.read_csv(data_dir + voting_history_filename, encoding = "ISO-8859-1")

In [115]:
# Make sure the data was read correctly and is what we expect

print(rep_and_district_info.head(3))
print(county_asthma_info.head(3))
print(county_polling_info.head(3))
print(voting_history.head(3))

   Branch First Name Last Name District Party  \
0  Senate     George    Barker       39     D   
1  Senate    Richard     Black       13     R   
2  Senate   Jennifer    Boysko       33     D   

                                            Counties  
0  Fairfax County, Prince William County, Alexand...  
1            Loudoun County, Prince William County,   
2                   Fairfax County, Loudoun County,   
            County Total Pop Under 18 65 & Over Pediatric Asthma Adult Asthma  \
0         Accomack    32,545    6,756     7,402              536        2,245   
1        Albemarle   107,702   21,636    19,153            1,719        7,565   
2  Alexandria City   160,035   28,866    17,715            2,293       11,685   

    COPD Lung Cancer CV Disease Diabetes Poverty Estimate  
0  2,045          18      2,723    3,222            5,715  
1  6,096          59      7,726    9,287            8,015  
2  8,058          88      9,353   11,706           15,922  
  GeoType    GeoNa

## Clean County Lists


In [116]:
rep_and_district_info_counties = rep_and_district_info['Counties']
rep_and_district_info_districts = rep_and_district_info['District']

#Clean up district values
rep_and_district_info_counties = pd.Series(rep_and_district_info_counties.str.split(pat=","))

all_counties = []
county_lists = []
for district_to_counties_list in rep_and_district_info_counties:
    sanitized_counties = []
    for county in district_to_counties_list:
        sanitized_county_name = county.replace(' County','').strip()
        if sanitized_county_name:
            all_counties.append(sanitized_county_name)
            sanitized_counties.append(sanitized_county_name)
    county_lists.append(sanitized_counties)    
            
districts = list(rep_and_district_info['District'])
for i in range(len(districts)):        
    if "th" in districts[i] or "st" in districts[i] or "nd" in districts[i] or "rd" in districts[i]:
            districts[i] = districts[i][:-2]
            
district_to_counties = dict(zip(districts, county_lists))

all_counties = list(set(all_counties))
all_counties = list([i for i in all_counties if i])

In [117]:
# Sanity check the outputs

district_to_counties

{'39': ['Fairfax'],
 '13': ['Prince William'],
 '33': ['Clarke', 'Frederick', 'Loudoun'],
 '40': ['Fairfax', 'Prince William'],
 '38': ['Fairfax'],
 '11': ['Roanoke City'],
 '14': ['Henry', 'Pittsylvania'],
 '16': ['Henry', 'Pittsylvania'],
 '8': ['Craig', 'Montgomery', 'Roanoke'],
 '25': ['Albemarle', 'Augusta', 'Rockingham'],
 '12': ['Giles', 'Montgomery', 'Pulaski'],
 '30': ['Culpeper', 'Madison', 'Orange'],
 '21': ['Chesapeake City', 'Virginia Beach City'],
 '31': ['Fauquier', 'Prince William'],
 '24': ['Amherst', 'Augusta', 'Bath', 'Rockbridge'],
 '32': ['Loudoun'],
 '6': ['Carroll', 'Smyth', 'Wythe'],
 '2': ['Prince William', 'Stafford'],
 '18': ['Culpeper', 'Fauquier', 'Rappahannock', 'Warren'],
 '37': ['Fairfax'],
 '1': ['Lee', 'Scott', 'Wise'],
 '9': ['Franklin', 'Henry', 'Patrick'],
 '4': ['Dickenson', 'Russell', 'Washington', 'Wise'],
 '29': ['Frederick', 'Warren'],
 '23': ['Amherst', 'Bedford'],
 '3': ['Bland', 'Buchanan', 'Russell', 'Tazewell'],
 '26': ['Rockingham'],
 '22

In [118]:
# Drop labels 133 to get rid of "total" column
asthma_info_counties = list(county_asthma_info['County'].drop(labels=133))
asthma_info_children = list(county_asthma_info['Pediatric Asthma'].drop(labels=133))
asthma_info_adults = list(county_asthma_info['Adult Asthma'].drop(labels=133))

asthma_info_children = [int(x.replace(',', '')) for x in asthma_info_children]
asthma_info_adults = [int(x.replace(',', '')) for x in asthma_info_adults]

county_to_asthma_children = dict(zip(asthma_info_counties, asthma_info_children))
county_to_asthma_adults = dict(zip(asthma_info_counties, asthma_info_adults))

In [119]:
county_to_asthma_adults

{'Accomack': 2245,
 'Albemarle': 7565,
 'Alexandria City': 11685,
 'Alleghany': 1064,
 'Amelia': 904,
 'Amherst': 2209,
 'Appomattox': 1076,
 'Arlington': 17210,
 'Augusta': 5311,
 'Bath': 315,
 'Bedford': 5461,
 'Bland': 465,
 'Botetourt': 2349,
 'Bristol City': 1167,
 'Brunswick': 1174,
 'Buchanan': 1549,
 'Buckingham': 1230,
 'Buena Vista City': 450,
 'Campbell': 3871,
 'Caroline': 2056,
 'Carroll': 2104,
 'Charles City': 516,
 'Charlotte': 830,
 'Charlottesville City': 3592,
 'Chesapeake City': 16141,
 'Chesterfield': 23149,
 'Clarke': 1013,
 'Colonial Heights City': 1184,
 'Covington City': 393,
 'Craig': 360,
 'Culpeper': 3391,
 'Cumberland': 686,
 'Danville City': 2799,
 'Dickenson': 1032,
 'Dinwiddie': 1987,
 'Emporia City': 355,
 'Essex': 775,
 'Fairfax': 78010,
 'Fairfax City': 1622,
 'Falls Church City': 968,
 'Fauquier': 4695,
 'Floyd': 1097,
 'Fluvanna': 1851,
 'Franklin': 3967,
 'Franklin City': 544,
 'Frederick': 5844,
 'Fredericksburg City': 2000,
 'Galax City': 452,
 '

# Asthma

## Sum childhood, adult, and total asthma for the counties that make up each district

Text: In the four counties that make up House District 100, 10,000 kids and 20,000 adults live with asthma.

In [120]:
# Make a mapping of istrict to asthma totals, # of counts of district.

district_to_asthma_text = {}
for district,counties in district_to_counties.items():    
    child_asthma_count = 0
    adult_asthma_count = 0
    for county in counties:
        child_asthma_count += county_to_asthma_children[county]
        adult_asthma_count += county_to_asthma_adults[county]
    
    if len(counties) > 1:
        district_to_asthma_text[district] =  """In the %d counties that make up House District %s, %d kids and %d adults live with asthma.""" % (len(counties), district, child_asthma_count, adult_asthma_count)
    else:
        district_to_asthma_text[district] =  """In the House District %s, %d kids and %d adults live with asthma.""" % (district, child_asthma_count, adult_asthma_count)

In [121]:
district_to_asthma_text

{'39': 'In the House District 39, 21376 kids and 78010 adults live with asthma.',
 '13': 'In the House District 13, 10040 kids and 30053 adults live with asthma.',
 '33': 'In the 3 counties that make up House District 33, 10832 kids and 32306 adults live with asthma.',
 '40': 'In the 2 counties that make up House District 40, 31416 kids and 108063 adults live with asthma.',
 '38': 'In the House District 38, 21376 kids and 78010 adults live with asthma.',
 '11': 'In the House District 11, 1784 kids and 6820 adults live with asthma.',
 '14': 'In the 2 counties that make up House District 14, 1729 kids and 7908 adults live with asthma.',
 '16': 'In the 2 counties that make up House District 16, 1729 kids and 7908 adults live with asthma.',
 '8': 'In the 3 counties that make up House District 8, 2797 kids and 14292 adults live with asthma.',
 '25': 'In the 3 counties that make up House District 25, 4274 kids and 18363 adults live with asthma.',
 '12': 'In the 3 counties that make up House 

# Polling

## Create a weighted average of polling results for the counties that make up each district

Text: 

In the counties that make up this district, [weighted average]% know that global warming is happening, and [weighted average] are somewhat or very worried about it. [weighted average]% support regulating CO2 as a pollutant, and [weighted average] want to provide tax rebates for people who purchase energy-efficient vehicles or solar panels. 


- Number of people who know that global warming is happening: 
    - [[x%]] in [[least populous county]], and [[y%]] in [[most populous county]]
- Percent who are somewhat or very worried about climate change: 
    - [[x%]] in [[least populous county]], and [[y%]] in [[most populous county]]
- Support regulating CO2 as a pollutant: 
    - [[x%]] in [[least populous county]], and [[y%]] in [[most populous county]]
- Support tax rebates for people who purchase energy-efficient vehicles or solar panels: 
    - [[x%]] in [[least populous county]], and [[y%]] in [[most populous county]]


In [122]:
county_polling_info_counties = list(county_polling_info['GeoName'])
county_polling_info_pop = dict(zip(county_polling_info_counties,list(county_polling_info['TotalPop'])))
county_polling_info_happening = dict(zip(county_polling_info_counties,list(county_polling_info['happening'])))
county_polling_info_worried = dict(zip(county_polling_info_counties,list(county_polling_info['worried'])))
county_polling_info_regulate = dict(zip(county_polling_info_counties,list(county_polling_info['regulate'])))
county_polling_info_rebates = dict(zip(county_polling_info_counties,list(county_polling_info['rebates'])))

In [123]:
district_to_overview = {}
for district,counties in district_to_counties.items():    
    total_pop, know_num, worried_num, regulate_num, rebates_num = 0.0,0.0,0.0,0.0,0.0
    
    for county in counties:
        total_pop += county_polling_info_pop[county]
        know_num += county_polling_info_happening[county] * county_polling_info_pop[county]
        worried_num += county_polling_info_worried[county] * county_polling_info_pop[county]
        regulate_num += county_polling_info_regulate[county] * county_polling_info_pop[county]
        rebates_num += county_polling_info_rebates[county] * county_polling_info_pop[county]
        
    district_to_overview[district] =  ("In the counties that make up"
                                       " this district, {0:.2f}% know that"
                                       " global warming is happening,"
                                       " and {1:.2f}% are somewhat or very"
                                       " worried about it. {2:.2f}% support"
                                       " regulating CO2 as a pollutant,"
                                       " and {3:.2f}% want to provide tax"
                                       " rebates for people who purchase"
                                       " energy-efficient vehicles or "
                                       "solar panels.").format(know_num/total_pop, worried_num/total_pop, regulate_num/total_pop, rebates_num/total_pop)

In [124]:
print(district_to_overview)

{'39': 'In the counties that make up this district, 77.25% know that global warming is happening, and 69.92% are somewhat or very worried about it. 78.79% support regulating CO2 as a pollutant, and 84.10% want to provide tax rebates for people who purchase energy-efficient vehicles or solar panels.', '13': 'In the counties that make up this district, 73.28% know that global warming is happening, and 65.14% are somewhat or very worried about it. 78.52% support regulating CO2 as a pollutant, and 83.82% want to provide tax rebates for people who purchase energy-efficient vehicles or solar panels.', '33': 'In the counties that make up this district, 72.54% know that global warming is happening, and 63.58% are somewhat or very worried about it. 78.58% support regulating CO2 as a pollutant, and 83.30% want to provide tax rebates for people who purchase energy-efficient vehicles or solar panels.', '40': 'In the counties that make up this district, 76.19% know that global warming is happening,

In [125]:
district_to_specific = {}
for district,counties in district_to_counties.items():
    max_pop_county, min_pop_county = '',''
    max_pop = -1
    min_pop = sys.maxsize

    
    for county in counties:
        if county_polling_info_pop[county] > max_pop:
            max_pop = county_polling_info_pop[county]
            max_pop_county = county
        if county_polling_info_pop[county] < min_pop:
            min_pop = county_polling_info_pop[county]
            min_pop_county = county                        
    
    if not max_pop_county or not min_pop_county:
        continue
    min_pop_happening = county_polling_info_happening[min_pop_county]
    min_pop_worried = county_polling_info_worried[min_pop_county]
    min_pop_regulate = county_polling_info_regulate[min_pop_county]
    min_pop_rebates = county_polling_info_rebates[min_pop_county]
    max_pop_happening = county_polling_info_happening[max_pop_county]
    max_pop_worried = county_polling_info_worried[max_pop_county]
    max_pop_regulate = county_polling_info_regulate[max_pop_county]
    max_pop_rebates = county_polling_info_rebates[max_pop_county]
    
    district_to_specific[district] = ("- Number of people who know that global warming is happening:" +
        "\n\t- {0:.2f}% in {1}, and {2:.2f}% in {3}").format(min_pop_happening, min_pop_county, max_pop_happening, max_pop_county)
    district_to_specific[district] += (
    "\n- Percent who are somewhat or very worried about climate change: "
        "\n\t- {0:.2f}% in {1}, and {2:.2f}% in {3}").format(min_pop_worried, min_pop_county, max_pop_worried, max_pop_county)

    district_to_specific[district] += (
    "\n- Support regulating CO2 as a pollutant: "
        "\n\t- {0:.2f}% in {1}, and {2:.2f}% in {3}"
    ).format(min_pop_regulate, min_pop_county, max_pop_regulate, max_pop_county)

    district_to_specific[district] += (
    "\n- Support tax rebates for people who purchase energy-efficient vehicles or solar panels: "
        "\n\t- {0:.2f}% in {1}, and {2:.2f}% in {3}").format(min_pop_rebates, min_pop_county, max_pop_rebates, max_pop_county)

In [126]:
print(district_to_specific)

{'39': '- Number of people who know that global warming is happening:\n\t- 77.25% in Fairfax, and 77.25% in Fairfax\n- Percent who are somewhat or very worried about climate change: \n\t- 69.92% in Fairfax, and 69.92% in Fairfax\n- Support regulating CO2 as a pollutant: \n\t- 78.79% in Fairfax, and 78.79% in Fairfax\n- Support tax rebates for people who purchase energy-efficient vehicles or solar panels: \n\t- 84.10% in Fairfax, and 84.10% in Fairfax', '13': '- Number of people who know that global warming is happening:\n\t- 73.28% in Prince William, and 73.28% in Prince William\n- Percent who are somewhat or very worried about climate change: \n\t- 65.14% in Prince William, and 65.14% in Prince William\n- Support regulating CO2 as a pollutant: \n\t- 78.52% in Prince William, and 78.52% in Prince William\n- Support tax rebates for people who purchase energy-efficient vehicles or solar panels: \n\t- 83.82% in Prince William, and 83.82% in Prince William', '33': '- Number of people who k

# Opposition Research

1. Build out an excel file with the following:
   - Name of incumbent
   - Important bills, and a vote (positive/negative)
   - 2 descriptions for each bill: positive and negative
   - The correct description to use, based on the vote history

2. Match incumbent names into the correct description, then add all descriptions together to create an opposition research paragraph that looks like this:

When it comes to common sense clean energy reforms, [[incumbent]] has a mixed record. They voted to block Virginia from entering regional collaborative efforts to combat climate change, specifically by voting to prevent Virginia from entering the Regional Greenhouse Gas Initiative. States already in this program have seen lower electricity bills and less pollution – joining it is a no-brainer. [[He/She]] also voted against integrating environmental education into Virginia's classrooms. 

However, [[incumbent]]’s record is not all bad. [[incumbent last name]] voted to ensure that the State Corporation Commission cannot reject or cut climate-forward legislation without reason. They also voted to establish a Clean Energy Advisory Board, which will run a pilot program to allow low-to-moderate income households to receive rebates for solar panels. Finally, [[Incumbent]] voted to ensure that coal plants dispose of coal ash– one of the largest types of industrial waste generated in the United States– safely and sustainably.



In [127]:
rep_and_district_info_districts = list(rep_and_district_info['District'])
rep_and_district_info_candidate_first = list(rep_and_district_info['First Name'])
rep_and_district_info_candidate_last = list(rep_and_district_info['Last Name'])

sanitized_candidate_names = []
for candidate_name in zip(rep_and_district_info_candidate_first,rep_and_district_info_candidate_last):
    sanitized_candidate_names.append(candidate_name[0] + " " + candidate_name[1])
    
candidate_to_district = {}
for candidate_district_pair in zip(sanitized_candidate_names, rep_and_district_info_districts):
    candidate = candidate_district_pair[0]
    district = candidate_district_pair[1]
    if "th" in district or "st" in district or "nd" in district or "rd" in district:
        candidate_to_district[candidate] = district[:-2]
    else:
        candidate_to_district[candidate] = district

In [128]:
candidate_to_voting_record_sentences = defaultdict(list)

for row in voting_history.values:
    candidate_name = row[1] + " " + row[2]
    for i in range(4, len(row)):
        candidate_to_voting_record_sentences[candidate_name].append(row[i].replace("Candidate", candidate_name))            

In [129]:
candidate_to_voting_record_sentences

defaultdict(list,
            {'George Barker': ['-',
              '-',
              '-',
              '-',
              'George Barker voted to make it more difficult for Virginia to enter regional collaborative efforts to combat climate change in the transportation sector (via the Transportation and Climate Initiative).',
              '-',
              '-',
              'George Barker voted to make it more difficult for Virginia to enter into a regional cap and trade program (RGGI). States in this program have seen lower electricity bills and more clean job growth - joining it makes common sense.',
              '-',
              '-',
              'George Barker voted to ensure that coal plants dispose of coal ash- one of the largest types of industrial waste generated in the United States-Â\xa0safely and sustainably.',
              '-',
              '-',
              "George Barker voted to improve monitoring of elecric utilities' energy efficienty programs, which promot

In [130]:
# Parse the voting record
    # We have positive and negative votes, but I think we really just need the sentences
    # Drop first x columns
candidate_to_voting_record_text = {}
for candidate,voting_record_sentences in candidate_to_voting_record_sentences.items():
    voting_record_text = ""
    for sentence in voting_record_sentences:
        if sentence != "-":
            voting_record_text += sentence + " "
    candidate_to_voting_record_text[candidate] = voting_record_text  

# Final Printing

In [131]:
# Write output to file
with open('output.txt', 'a') as outfile: 
    for candidate in sanitized_candidate_names:
        d = candidate_to_district[candidate]
        district_text = (
            district_to_asthma_text[d] + "\n\n" + 
            district_to_overview[d] + "\n\n" + 
            district_to_specific[d] + "\n\n" + 
            candidate_to_voting_record_text[candidate]
        )
        
        outfile.write("_" * 80 + "\n")
        outfile.write(district_text + "\n")
        outfile.write("_" * 80 + "\n")

# Print output for convenience
# with open('output.txt', 'r') as outfile:
#     for line in outfile:
#         print(line)