## Setup


In [19]:
#imports
import os, sys, json  # Provides OS-dependent functionality, system-specific parameters, JSON handling
import pandas as pd             # Provides data structures and data analysis tools
import numpy as np              # Supports large, multi-dimensional arrays and matrices
import requests
import time
import xlsxwriter
from tqdm import tqdm
from datetime import date #date/time manipulation

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None  # default='warn'
from IPython.display import display_markdown
# %%

#### Definitions

In [20]:
#Intial legislator data cleaning
def make_initial_df(df, state, chamber):
    result = df[(df['State Abbreviation'] == state) & (df['Chamber'] == chamber)]
    return result

In [21]:
#get legislator details such as district, tenure, party, etc
def calc_leg_details(state_chamber) :
    track_cols = ['State Abbreviation','Chamber','Title', 'First Name',
        'Last Name', 'Party', 'District', 'Date Assumed Office', 'Committee List',
        ]
    state_chamber = state_chamber[track_cols]
    
    # Calculate tenure
    current_year = date.today().year
    # print(type(current_year))
    state_chamber['tenure'] = current_year - state_chamber['Date Assumed Office']
    state_chamber['leader'] = np.nan
    try:
        state_chamber['leader'] = state_chamber['Committee List'].apply(lambda x: str(x).split('|', 1)[0] if len(str(x).split('|', 1)) == 2 else np.nan)
    except:
        print("something wrong with leader column creation")
    return state_chamber

In [22]:
# get committees definition
def get_comms(df, state, chamber):
    result = df[(df['state'] == state) & (df['branch'] == chamber)]
    comms_list = result['committee'].to_list()
    
    return comms_list

In [23]:
#create final state leg file
def create_state_leg_files(df, state_list):
    current_date = str(date.today())
    current_date = current_date.replace('-', '_')
    for state in state_list:
        house = make_initial_df(df, str(state), "House")
        house_df = calc_leg_details(house)
        # print(house)
        senate = make_initial_df(df, state, "Senate")
        senate_df = calc_leg_details(senate)
        if senate_df is None:
            print(f'something wrong with {state} senate')
            print(senate.to_string())
            break
        if house_df is None:
            print(f'something wrong with {state} house')
            print(house.to_string())
            break
        
        if len(senate_df) == 0:
            print('zero length')
            print(state)
            return None
        elif len(house_df) == 0 :
            print(state)
            print('zero length')
            return None
        new_path = f'C:\\Users\\clutz\\OneDrive - THE HUNT INSTITUTE\\Documents\\Data\\legislator data\\{state}\\'
        if not os.path.exists(new_path):
            os.makedirs(new_path)
        os.chdir(new_path)
        
        # Create a Pandas Excel writer using XlsxWriter as the engine.
        file_name = f'{state}_legislators_{current_date}.xlsx'
        writer = pd.ExcelWriter(file_name, engine='xlsxwriter')
        # Write each dataframe to a different worksheet.
        house_sheet = f'{state}_house'
        senate_sheet = f'{state}_senate'
        house_df.to_excel(writer, sheet_name=house_sheet, index=False)
        senate_df.to_excel(writer, sheet_name=senate_sheet, index=False)
        # Close the Pandas Excel writer and output the Excel file.
        writer.close()

### Data Gathering

In [24]:
#states to pull info for
states = ['ND', 'NM', 'OH', 'OK', 'VA', 'WV', 'AL', 'CT', 'IL', 'IN', 'KS', 'MO', 'NC']

In [25]:
#get officials from quorum sheets download; currently coming from data_tracking_v2 sheet on quorum
officials_file = r"C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\data_tracking_v2.xlsx"
officials = pd.read_excel(officials_file)

In [26]:
#get list of committees for each state legislative chamber
comms_file = r"C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\committees_data.xlsx"
comms = pd.read_excel(comms_file)

In [27]:
# Final Call
# create_state_leg_files(officials, states)
# %% one off creations
create_state_leg_files(officials, ['WV'])

# Individual Calls

### NC 
#### House


In [None]:
nc_house = make_initial_df(officials, "NC", "House")
nc_house_df = calc_leg_details(nc_house)
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\nc\cleaned')
nc_house_df.to_csv('nc_house_officials.csv')


#### senate


In [None]:

nc_senate = make_initial_df(officials, "NC", "Senate")
nc_senate_df = calc_leg_details(nc_senate)
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\nc\cleaned')
nc_senate_df.to_csv('nc_senate_officials.csv', index=False)

### Illinois

In [None]:
#senate
il_house = make_initial_df(officials, "IL", "House")
il_house_df = calc_leg_details(il_house)

In [None]:
# House
nd_house = make_initial_df(officials, "ND", "House")
nd_house_df = calc_leg_details(nd_house)

In [None]:
#Senate
nd_senate = make_initial_df(officials, "ND", "Senate")
nd_senate_df = calc_leg_details(nd_senate)

In [None]:
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\nd')
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('ND_legislators.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.

### North Dakota
#### House

In [None]:
nd_house = make_initial_df(officials, "ND", "House")
nd_house_df = calc_leg_details(nd_house)

#### Senate

In [None]:
nd_senate = make_initial_df(officials, "ND", "Senate")
nd_senate_df = calc_leg_details(nd_senate)

In [None]:
#writing to excel for ND
nd_house_df.to_excel(writer, sheet_name='nd_house', index=False)
nd_senate_df.to_excel(writer, sheet_name='nd_senate', index=False)
# Close the Pandas Excel writer and output the Excel file.
writer.close()


os.chdir(f'C:\\Users\\clutz\\OneDrive - THE HUNT INSTITUTE\\Documents\\Data\\legislator data\\{state}')
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('ND_legislators.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.

### New Mexico

In [None]:
#House
nm_house = make_initial_df(officials, "NM", "House")
nm_house_df = calc_leg_details(nm_house)

In [None]:
#Senate
nm_senate = make_initial_df(officials, "NM", "Senate")
nm_senate_df = calc_leg_details(nm_senate)