In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import math
import geopandas as gpd
import random
from bs4 import BeautifulSoup
import html5lib
import requests
import regex as re
from matplotlib import colors

In [None]:
states = gpd.read_file('/Users/gmoos19/Downloads/tl_2021_us_state/tl_2021_us_state.shp')
districts = gpd.read_file('/Users/gmoos19/Downloads/tl_2021_us_cd116/tl_2021_us_cd116.shp')

In [None]:
regions = {'Index': ['1','2','3','4'], 'Region': ['Northeast','Midwest', 'South','West']}
r_df = pd.DataFrame(regions)

In [None]:
r = requests.get("https://www.nrcs.usda.gov/wps/portal/nrcs/detail/?cid=nrcs143_013696")
soup = BeautifulSoup(r.content)
soup.find_all('th')
names = []
postal_codes = []
FIPS = []
names.append(str(soup.find_all('th')[0]).replace('<th scope="col">',"").replace('</th>',"").replace('\r\n\t\t\t\t',""))
postal_codes.append(str(soup.find_all('th')[1]).replace('<th scope="col">',"").replace('</th>',"").replace('\r\n\t\t\t\t',""))
FIPS.append(str(soup.find_all('th')[2]).replace('<th scope="col">',"").replace('</th>',"").replace('\r\n\t\t\t\t',""))
fp_list = soup.find_all('td')
for i in range(30,195,3):
    names.append(str(fp_list[i]).replace("</td>","").replace('<td>\r\n\t\t\t\t',""))
    postal_codes.append(str(fp_list[i+1]).replace("</td>","").replace('<td>\r\n\t\t\t\t',""))
    FIPS.append(str(fp_list[i+2]).replace("</td>","").replace('<td>\r\n\t\t\t\t',""))
    
state_fps = {names[0]: names[1:], postal_codes[0]: postal_codes[1:], FIPS[0]: FIPS[1:]}
state_fps_df = pd.DataFrame(state_fps)

In [None]:
districts = districts.merge(state_fps_df,how='left', left_on='STATEFP', right_on='FIPS').drop(columns='FIPS')
region_state_df = states[['REGION','STUSPS']]
region_state_df = region_state_df.merge(r_df, how='left', left_on='REGION', right_on='Index').drop(columns=['Index','REGION'])
districts = districts.merge(region_state_df,how='left', left_on='Postal Code', right_on='STUSPS').drop(columns='STUSPS')

In [None]:
congressional = requests.get('https://www.house.gov/representatives')
soup1 = BeautifulSoup(congressional.content)
congress_people = soup1.find_all('table')
state_leg_dict = {'State': [], 'Party': [], 'Districts': [], 'Full Names': []}

for state_leg in congress_people:
    state_leg = str(state_leg)
    state = re.search('>\n(.*)\n                </caption>', state_leg)
    state_name = state.group(1).strip()
    district_lst = re.findall('class="views-field views-field-value-2"(.*)        </td>', state_leg)
    full_names = re.findall('.house.gov[/]{0,1}">(.*)</a> </td>', state_leg)
    party = re.findall('class="views-field views-field-value-7"(.*)        </td>', state_leg)
    if state_name == 'Minnesota':
        full_names.insert(0,'Hagedorn, Jim')
    if state_name == 'Massachusetts':
        full_names.insert(4, 'Clark, Katherine')
    if state_name == 'California':
        full_names.insert(21, 'Nunes, Devin')
    if len(state_name) > 1:
        for i in party:
            if i[-1] == ' ':
                state_leg_dict['Party'].append('R')
            else:
                state_leg_dict['Party'].append(i[-1])
            state_leg_dict['State'].append(state_name)
        for dist in district_lst:
            dist_code = dist.split('>')[-1]
            if dist_code == 'At Large':
                dist_code = '00'
            else:
                dist_code = dist_code[:-2]
            if len(dist_code) == 1:
                dist_code = '0' + dist_code
            state_leg_dict['Districts'].append(dist_code)
        for name in full_names:
            state_leg_dict['Full Names'].append(name)
states_df = pd.DataFrame(state_leg_dict)

In [None]:
states_df['State'].unique()
lst_to_remove = ['American Samoa', 'District of Columbia', 'Guam', 'Northern Mariana Islands', 'Puerto Rico', 'Virgin Islands']
states_df = states_df[~states_df['State'].isin(lst_to_remove)]
full_mapping = districts.merge(states_df,how='left', left_on=['Name','CD116FP'], right_on=['State','Districts']).drop(columns=['Name'])
lower_48 = full_mapping[~full_mapping['State'].isin(['Hawaii', 'Alaska'])]


In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable
South = full_mapping[full_mapping['State'] == 'Florida']
cmap = colors.ListedColormap(['blue', 'red'])
fig, ax = plt.subplots(1, 1)
divider = make_axes_locatable(ax)
lower_48.plot(column='Party', cmap=cmap, figsize=(200, 2000), ax=ax, legend=True, edgecolor='black')

In [None]:
test_mapping = full_mapping[full_mapping['Region'].isin(['Northeast', 'Midwest', 'West', 'South'])]

In [None]:
test_map = create_map(test_mapping)
test_map.save('congressional_mapping.html')

In [None]:
def target_state(df, state=None, is_region=False, region=None):
    
    if not is_region:
        a = df[df['State'] == state]
    else:
        a = df[df['Region'] == region]
        
    return a
    
def create_map(df):
    mapping = df.explore(
     column='Party', # make choropleth based on "BoroName" column
     tooltip="Party", # show "BoroName" value in tooltip (on hover)
     popup=True, # show all values in popup (on click)
     tiles="CartoDB positron", # use "CartoDB positron" tiles
     cmap=['blue','red'], # use "Set1" matplotlib colormap
     style_kwds=dict(color="black"), # use black outline
    width = '50%',
    height = "50%")
    return mapping



test_mapping_1 = target_state(test_mapping, None, True, 'South')
mapped_test = create_map(test_mapping_1)


sw = test_mapping_1[['INTPTLAT', 'INTPTLON']].min().values.tolist()
ne = test_mapping_1[['INTPTLAT', 'INTPTLON']].max().values.tolist()

mapped_test.fit_bounds([sw, ne]) 
mapped_test