In [1]:
import os, sys, json, re, xlrd  # Provides OS-dependent functionality, system-specific parameters, JSON handling, and date/time manipulation
from datetime import date
import pandas as pd             # Provides data structures and data analysis tools
from openpyxl import Workbook
import numpy as np              # Supports large, multi-dimensional arrays and matrices
import requests
import glob
import time
from tqdm import tqdm
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None  # default='warn'
from IPython.display import display_markdown

from cprl_functions.state_capture import thi_states,state_ref, state_coding, state_pat, state_abv_pat
from cprl_functions.text_printing import bordered
from cprl_functions.defined_functions import create_pk, add_seats, get_key

### Definitions

In [2]:
def bordered(text):
    
    if isinstance(text, int) or isinstance(text, str):
        text = str(text)
    try:
        lines = text.splitlines()
        width = max(len(s) for s in lines)
        res = ['┌' + '─' * width + '┐']
        for s in lines:
            res.append('│' + (s + ' ' * width)[:width] + '│')
        res.append('└' + '─' * width + '┘')
        return '\n'.join(res)
    except:
        lines = [text]
        width = len(str(lines[0]))
        res = ['┌' + '─' * width + '┐']
        for s in lines:
            res.append('│' + (s + ' ' * width)[:width] + '│')
        res.append('└' + '─' * width + '┘')
        return '\n'.join(res)



In [3]:
def is_majority_party(list, x) :
    rep = [x for x in list if "Republican" in str(x)]
    dem = [x for x in list if "Democrat" in str(x)]

    rep_count = len(rep)
    dem_count = len(dem)

    if rep_count > dem_count:
        maj_party = "Republican"
    elif dem_count > rep_count:
        maj_party = "Democrat"
    else:
        print('somehow they are equal')

    if maj_party == x:
        return True
    else: 
        return False
    


### Data Gathering
Gather data and clean for legislator data

In [4]:

#gather all legislator files from done folder
#committee data should be updated before pulling this

os.chdir(r'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\done')
legislator_files = glob.glob('*.xlsx') 



In [5]:
#compiles legislator files into one file
#goes through each sheet and retrieves sheet as dataframe
dfs = {}
for i,file in enumerate(legislator_files):
    #print('working on file:' + str(file))
    # file = legislator_files[0]
    # xls = pd.ExcelFile(file)
    sheets_dict = pd.read_excel(file, engine="openpyxl", sheet_name=None)
    sheet_names = list(sheets_dict.keys())
    for s in sheet_names:
        df = pd.read_excel(file, engine="openpyxl", sheet_name=s)
        
        
        filename =  f'{s}'
        dfs[filename] = df




### Pulling all files together

In [6]:

# this may not even be used
#trims files to not include committee data
compiling = []
for k,v in dfs.items():
    #print(*v.columns, sep = " | ")
    df = v.iloc[:, :9]
    compiling.append(df)
    #print(k," is in")

#pull togther all newly trimmed df's
all_legs_files = pd.concat(compiling)
all_legs_files.reset_index(inplace=True, drop=True)

# os.chdir(r'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024')
# all_legs_files.to_csv(f'all_legs_files_{str(date.today()).replace('-','_')}.csv', index=False)




## Key Lookup from "Key_Creation.py"
Pull in ref key 

In [7]:
#Pulling in Legislator reference file comes from outside file
leg_lookup = r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\connectors\leg_lookup_df.csv'
leg_lookup_ref = pd.read_csv(leg_lookup)

leg_lookup_dict = (leg_lookup_ref.loc[:,['full_pk', 'Last Name']]).set_index('full_pk')['Last Name'].to_dict() 
# ms_legs_lookup = (ms_legs.loc[:,['full_pk', 'Last Name']]).set_index('full_pk')['Last Name'].to_dict()

ms_legs = leg_lookup_ref[~leg_lookup_ref['full_pk'].astype(str).str.endswith('00')]

ms_legs_lookup = (ms_legs.loc[:,['full_pk', 'Last Name']]).set_index('full_pk')['Last Name'].to_dict()

# for k,v in ms_legs_lookup.items():
#     print(f'{k} - type: {type(k)}')
#     print(f'{v} - type: {type(v)}')

# leg_lookup_ref_noo = leg_lookup_ref[~leg_lookup_ref['full_pk'].astype(str).str.endswith('00')]
# leg_lookup_ref_noo = (leg_lookup_ref_noo.loc[:,['full_pk', 'Last Name']]).set_index('full_pk')['Last Name'].to_dict()
# leg_lookup_ref_noo
ms_legs

Unnamed: 0,full_pk,primary_key,First Name,Last Name
1866,43000101,430001,Patrick,Hatlestad
1867,43000102,430001,David,Richter
1868,43000201,430002,Bert,Anderson
1869,43000202,430002,Donald,Longmuir
1870,43000301,430003,Jeff,Hoverson
...,...,...,...,...
1987,57101402,571014,Jay,Taylor
1989,57101601,571016,Jason,Barrett
1990,57101602,571016,Patricia,Rucker
1991,57101701,571017,Eric,Nelson


## All Leg Files
not sure why im keep this one as it doesnt really go into anything

In [8]:
dfs_w_pk = {}

for k,v in dfs.items():
    # display_markdown(f'## {k}', raw = True)
    print(type(v))
    if v.empty:
        print('1')
        print('intitial is empty?')
        break
    
    #call funciton to get pk
    cleaned_df, dupes = create_pk(v,'district', 'Chamber')
    
    #intitalize list to concat
    dfs_to_concat = []

    #is cleaned empty? put in list to concat if so
    if cleaned_df.empty:

        print('normals are empty (from loop)')
    else:
        non_dupes = add_seats(df = cleaned_df)
        dfs_to_concat.append(cleaned_df)
        # break
    
    #are dupes empty? clean and put in list to concat if so
    if dupes.empty:
        print('dupes are empty (from loop)')
    else:
        display_markdown(f'## {k}', raw = True)
        dupes['full_pk'] = np.nan
        dupes['seat_num'] = np.nan
        print('*****************')
        # print('Columns')
        # print(duplicates.columns, sep = ' , ')
        # print(duplicates.head(2))
        # dupes
        for i,j in enumerate(dupes['district_code']):
            # print(i)
            try:
                name = dupes.loc[i,'Last Name']
            except:
                print(dupes.index)
            full_pks = get_key(name, ms_legs_lookup)
            # print(f'full_pks: {full_pks}')
            # district_code = str(duplicates.loc[i,['district_code']])
            # print(d_duplicates.loc[i,['First Name']])
            # print(d_duplicates.loc[i,['Last Name']])
            for ip,p in enumerate(full_pks):
                # print(p)
                # print(type(p))
                p_str = str(p).strip()
                # print(f' p = {p}, type:{type(p)}')
                # print(f' j = {j}, type:{type(j)}')

                match = re.findall(r'(?<=^\d{3})\d{3}(?=\d{2})', p_str)
                if j == match[0]:
                    # print('finally found it')
                    # print(type(p))
                    # print(p_str)
                    dupes.loc[i,'full_pk'] = p_str
                    seat_match = re.findall(r'\d{2}$', p_str)
                    dupes.loc[i,'seat_num'] = seat_match[0]
                    break
               
                

        new_dupes = dupes
        # print('NEW DUPES')
        # print('#########')
        # print(new_dupes.head(2))
        dfs_to_concat.append(new_dupes)
            # print('putting in a new d')
            



    # duplicates.loc[i,['full_pk']] = int(full_pk)
    # print(full_pk)

    for i,y in enumerate(dfs_to_concat):

        first_column = y.pop('full_pk')
        third_column = y.pop('state_code')
        fourth_column = y.pop('chamber_code')
        sixth_column = y.pop('seat_num')
        y.insert(0, 'full_pk', first_column)
        y.insert(2, 'state_code', third_column)
        y.insert(3, 'chamber_code', fourth_column)
        y.insert(5, 'seat_num', sixth_column)
        # y.reset_index(drop = True)
        dfs_to_concat[i] = y


    if len(dfs_to_concat) == 2:
        d_new = pd.concat(dfs_to_concat)
    elif len(dfs_to_concat) == 1:
        d_new = dfs_to_concat[0]
    else:
        print('all are empty')

    print(d_new.columns)
    # d_new = d_new.loc[:,['full_pk', 'primary_key', 'First Name', 'Last Name']]
    dfs_w_pk[k] = d_new


# print(leg_files_fpk.iloc[:3,:4].to_string)



<class 'pandas.core.frame.DataFrame'>
duplicate df is empty (from function)
dupes are empty (from loop)
Index(['full_pk', 'primary_key', 'state_code', 'chamber_code', 'district_code',
       'seat_num', 'State Abbreviation', 'Chamber', 'full title', 'First Name',
       'Last Name', 'Party', 'district', 'tenure', 'leader',
       'Education Policy', 'Ways and Means Education',
       'Ways and Means General Fund', 'Children and Senior Advocacy',
       'Health'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
duplicate df is empty (from function)
dupes are empty (from loop)
Index(['full_pk', 'primary_key', 'state_code', 'chamber_code', 'district_code',
       'seat_num', 'State Abbreviation', 'Chamber', 'full title', 'First Name',
       'Last Name', 'Party', 'district', 'tenure', 'leader',
       'Education Policy', 'Finance and Taxation Education',
       'Finance and Taxation General Fund', 'Children and Youth Health',
       'Healthcare'],
      dtype='object')
<class 

## ND_house

*****************
Index(['full_pk', 'primary_key', 'state_code', 'chamber_code', 'district_code',
       'seat_num', 'State Abbreviation', 'Chamber', 'full title', 'First Name',
       'Last Name', 'Party', 'district', 'seat', 'tenure', 'leader',
       'Appropriations (Education and Environment)', 'Education',
       'Finance & Taxation', 'Human Services'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
duplicate df is empty (from function)
dupes are empty (from loop)
Index(['full_pk', 'primary_key', 'state_code', 'chamber_code', 'district_code',
       'seat_num', 'State Abbreviation', 'Chamber', 'full title', 'First Name',
       'Last Name', 'Party', 'district', 'seat', 'tenure', 'leader',
       'Appropriations (Education and Environment)', 'Education',
       'Finance & Taxation', 'Human Services'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
duplicate df is empty (from function)
dupes are empty (from loop)
Index(['full_pk', 'primary_key', 'state_code

## WV_senate

*****************
Index(['full_pk', 'primary_key', 'state_code', 'chamber_code', 'district_code',
       'seat_num', 'State Abbreviation', 'Chamber', 'full title', 'First Name',
       'Last Name', 'Party', 'district', 'tenure', 'leader', 'Education',
       'Finance', 'Health & Human Resources', 'School Choice',
       'Education (Joint)', 'Children & Families (Joint)'],
      dtype='object')


In [9]:
# for k,v in dfs_w_pk.items():
#     print(k)
#     print(v.head(2))

value = dfs_w_pk.get("AL_house")
print(value.to_string())
value

      full_pk primary_key state_code chamber_code district_code seat_num State Abbreviation Chamber                                  full title  First Name    Last Name       Party  district  tenure                             leader         Education Policy Ways and Means Education Ways and Means General Fund Children and Senior Advocacy      Health
0    10000100      100001         10            0           001       00                 AL   House       Alabama Representative Phillip Pettus     Phillip       Pettus  Republican         1      10                                NaN                      NaN                      NaN                         NaN                          NaN         NaN
1    10001000      100010         10            0           010       00                 AL   House        Alabama Representative Marilyn Lands     Marilyn        Lands    Democrat        10       0                                NaN                      NaN                      NaN           

Unnamed: 0,full_pk,primary_key,state_code,chamber_code,district_code,seat_num,State Abbreviation,Chamber,full title,First Name,Last Name,Party,district,tenure,leader,Education Policy,Ways and Means Education,Ways and Means General Fund,Children and Senior Advocacy,Health
0,10000100,100001,10,0,001,00,AL,House,Alabama Representative Phillip Pettus,Phillip,Pettus,Republican,1,10,,,,,,
1,10001000,100010,10,0,010,00,AL,House,Alabama Representative Marilyn Lands,Marilyn,Lands,Democrat,10,0,,,,,,
2,10010000,100100,10,0,100,00,AL,House,Alabama Representative Mark Shirey,Mark,Shirey,Republican,100,2,,,,,,Member
3,10010100,100101,10,0,101,00,AL,House,Alabama Representative Chris Pringle,Chris,Pringle,Republican,101,10,Speaker Pro Tempore of the House,,,,,
4,10010200,100102,10,0,102,00,AL,House,Alabama Representative Shane Stringer,Shane,Stringer,Republican,102,6,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,10009500,100095,10,0,095,00,AL,House,Alabama Representative Frances Holk-Jones,Frances,Holk-Jones,Republican,95,2,,Member,,,,Member
100,10009600,100096,10,0,096,00,AL,House,Alabama Representative Matt Simpson,Matt,Simpson,Republican,96,6,,,,,,
101,10009700,100097,10,0,097,00,AL,House,Alabama Representative Adline Clarke,Adline,Clarke,Democrat,97,11,,,,,,
102,10009800,100098,10,0,098,00,AL,House,Alabama Representative Napoleon Bracy,Napoleon,Bracy,Democrat,98,14,,,,Member,,


### Manual Leadership Files

In [None]:
leadership_positions_file = r"C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\all_legs_files_w_rankings.csv"
leaders_lookup = pd.read_csv(leadership_positions_file)

#create primary key for leadership file
infl_rankings, rankings_dupes = create_pk(leaders_lookup, 'district', 'Chamber')

# for i,j in enumerate(infl_rankings['primary_key']):
#     print(type(j))
#     print(j)
# rankings_dupes.columns

infl_rankings = infl_rankings.dropna(axis = 0, subset='district')
infl_rankings.reset_index(inplace = True, drop = True)

#fill in dupes seats and full pk
rankings_dupes['full_pk'] = np.nan

# print(rankings_dupes.columns)
for i,j in enumerate(rankings_dupes['district_code']):
    district_code = j
    name = rankings_dupes['Last Name'].iloc[i]
    full_pks = get_key(name, ms_legs_lookup)
    # print(full_pks)
    # print(rankings_dupes.columns)
    
    for ip,p in enumerate(full_pks):
        # print(p)
        # print(type(p))
        p_str = str(p).strip()
        # print(f' p = {p}, type:{type(p)}')
        # print(f' j = {j}, type:{type(j)}')

        match = re.findall(r'(?<=^\d{3})\d{3}(?=\d{2})', p_str)
        if j == match[0]:
            # print('finally found it')
            # print(type(p))
            # print(p_str)
            rankings_dupes.loc[i,'full_pk'] = p_str
            seat_match = re.findall(r'\d{2}$', p_str)
            rankings_dupes.loc[i,'seat_num'] = seat_match[0]
            break





    # # print(district_code)
    # for ip,p in enumerate(full_pks):
    #     if re.search(fr'(?<=^\d{3}){district_code}(?=\d{2})', str(p)):
    #         rankings_dupes.loc[i,'full_pk'] = int(full_pks[i])
    #         break


infl_non_dupes = add_seats(df = infl_rankings)
# print(infl_non_dupes.columns)
# print(rankings_dupes.columns)
infl_non_dupes = infl_non_dupes.loc[:,['full_pk', 'primary_key', 'seat_num','First Name', 'Last Name', 'leader']]
rankings_dupes = rankings_dupes.loc[:,['full_pk', 'primary_key', 'seat_num', 'First Name', 'Last Name','leader']]
# print(infl_non_dupes.columns)
# print(rankings_dupes.columns)
leadership_files = pd.concat([infl_non_dupes, rankings_dupes])
leadership_files.reset_index(inplace=True, drop=True)
# leaders_lookup = leaders_lookup.loc[:, ['helper', "leader"]]

leadership_dict = (leadership_files.loc[:,['full_pk', 'leader']]).set_index('full_pk')['leader'].to_dict()

# for k,v in leadership_dict.items():
#     print(k,v)
# ms_legs_lookup = (ms_legs.loc[:,['full_pk', 'Last Name']]).set_index('full_pk')['Last Name'].to_dict()
# infl_non_dupes
# rankings_dupes
# leadership_files


### Influence Score calculation
Pulls in committee data, leadership values, and tenure to calculate tenure score

In [None]:
# value = dfs_w_pk.get('ND_house')
# print(value.to_string())

In [12]:

#pulling in data from legislator files, pulls in 
from collections import Counter

in_process = []
influence_scores = []
for k,v in dfs_w_pk.items():
    display_markdown(f' # {k}', raw = True)
    # v = dfs.get('AL_house')
    df = v
    
    #Conneticut is all in one file since there committies are all joint
    #This splits them up and puts them into a list, otherwise single files get put into a list of one
    if re.search(r'^CT', str(k)):
        house = df[df['Chamber'] == "House"]
        house.reset_index(inplace=True, drop=True)

        
        senate = df[df['Chamber'] == "Senate"]
        senate.reset_index(inplace=True, drop=True)
        # #print(house.to_string())
        # #print(senate.to_string())
        dfs_temp = [house, senate]
        print(k)
    
    else:
        dfs_temp = [df]
        print(k)

    # print(dfs_temp, sep = ' , ')
    
    for d in dfs_temp:
        print('###########')
        # print('non-pk one')
        # print(d.head())
        # print(d.head(2))
    

        
        #getting all columns except for committee columns
        col_list = d.columns.to_list()
        for ic,col in enumerate(col_list):
            if re.search(r'^leader', str(col)):
                index_start = ic+1
                break
            else:
                continue
  
        # to_append = d.iloc[:,:index_start]
        
        # in_process.append(to_append)
        
        # d_coms = d.iloc[:, f'-{index_start}'index_start:]
        # #print(d.shape[1])

        d_coms = d.iloc[:, [0] + list(range(index_start, (d.shape[1]-1)))]
        # #print(d_2.columns)


        #getting list of committee memberships, list would include a collection of "none, Member, Vice Chair, Chair, or even Minority Chair"
        comm_dict = {}
        for i,dc in enumerate(d_coms['full_pk']):
            
            coms_list = d_coms.iloc[i,1:].to_list()
            comm_dict[dc] = coms_list

        


        # print(d.to_string())
        # #getting majority party and splitting up by dems and repubs
        party_list_uc = d['Party'].to_list()   
        d['influence_score'] = np.nan
        for i,hv in enumerate(d['full_pk']):
            
            
            #variable declaration
            score = 1
            first_tier = False
            second_tier = False
            other_tier = False
            in_maj_party = False
            is_chair = False
            is_vice = False
            member = False
            minority_mem = False
            
            
            #retrieving values
            value = leadership_dict.get(hv)
            d.loc[i,'leaders'] = value
            if re.search(r'\[\d\]', str(value)):
                #print('found a top leader')
                if re.search(r'\[1\]', str(value)):
                    first_tier = True
                elif re.search(r'\[2\]',str(value)):
                    second_tier = True                
            else:
                other_tier = True

            #get the majority party
            if is_majority_party(party_list_uc, str(d['Party'].iloc[i])):
                in_maj_party = True

            #get comms list
            leg_comms = comm_dict.get(hv)
            # #print("****Legislator's Comms")
            for leg in leg_comms:
                
                if isinstance(leg, float):
                    continue
                elif re.search(r'^[Cc]hair', str(leg)):
                    is_chair = True
                elif re.search(r'[Vv]ice-?\s?[Cc]hair', str(leg)):
                    is_vice = True
                elif re.search(r'[Mm]ember', str(leg)):
                    member = True
                elif re.search(r'[Mm]inority', str(leg)):
                    minority_mem = True
                # else:
                #     print("something else")

    
            #scoring
            if in_maj_party == True:
                #print('in majority party')
                if first_tier == True:
                    score = 20
                    #print("speaker")
                elif second_tier == True:
                    score = 15
                    #print("other majority leaders")

                elif is_chair == True:
                    score = 15
                    #print('chair of a committee')
                elif is_vice == True:
                    score = 10
                    #print('vice chair of a committe')
                elif other_tier == True:
                    score = 10
                    #print('other majority leadership')
                elif member == True:
                    score = 5   
            elif in_maj_party == False:
                #print('not in majority party')
                if first_tier == True:
                    score = 15
                    #print('minority leader')
                elif is_chair == True:
                    score = 15
                    #print('chair of a committee')

                elif second_tier == True:
                    score = 10
                elif is_vice == True:
                    score = 10
                    #print('vice chair of a committe')

                elif minority_mem == True:
                    score = 5
                    #print('is minority ranking mem in committee')
                elif member == True:
                    score = 5
                    #print('is a committee member')
                elif other_tier == True:
                    score = 5
                    #print('other minority leadership')


            #pull out tenure modifier
            tenure = d['tenure'].iloc[i]
            if tenure > 10:
                score += 3
            elif tenure > 6:
                score += 2
            elif tenure > 2:
                score += 1


            #make sure 20 is max score
            if score > 20:
                score = 20

            if score == 1:
                continue



            #assign score to influence score column
            d.loc[i,'influence_score'] = score
        
        #df creation and appending to list of dfs
        final_df = d.loc[:,['full_pk', 'First Name', 'Last Name', 'influence_score']]
        influence_scores.append(final_df)



 # AL_house

AL_house
###########


 # AL_senate

AL_senate
###########


 # CT_joint_coms

CT_joint_coms
###########
###########


 # IL_house

IL_house
###########


 # IL_senate

IL_senate
###########


 # IN_Senate

IN_Senate
###########


 # IN_House

IN_House
###########


 # KS_house

KS_house
###########


 # KS_senate

KS_senate
###########


 # MO_house

MO_house
###########


 # MO_senate

MO_senate
###########


 # NC_house

NC_house
###########


 # NC_senate

NC_senate
###########


 # ND_house

ND_house
###########


 # ND_senate

ND_senate
###########


 # NM_house

NM_house
###########


 # NM_senate

NM_senate
###########


 # OH_house

OH_house
###########


 # OH_senate

OH_senate
###########


 # OK_house

OK_house
###########


 # OK_senate

OK_senate
###########


 # VA_house

VA_house
###########


 # VA_senate

VA_senate
###########


 # WV_house

WV_house
###########


 # WV_senate

WV_senate
###########


In [None]:
# full_df = pd.concat(in_process)
# full_df

In [13]:

#pull together all dfs and export
leg_infl_df = pd.concat(influence_scores)
leg_infl_df = leg_infl_df.dropna(subset='full_pk')
leg_infl_df.reset_index(drop = True, inplace= True)


In [14]:

os.chdir(r'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\build files')
leg_infl_df.to_csv("leg_infl_df.csv", index=False)
        
# leg_infl_df


### Defunct
Chunk below is vistigial of using rankings list from ncls website

Cell below is an older chunk that looked through the raw legislator files, cell above contains the same information

In [31]:
# leader_dfs = []
# for i,j in enumerate(leader_rankings_df['position']):
#     if re.search(r'[Ss]peaker', str(j)):
#         continue
#     elif re.search(r'[Mm]ajority|[Mm]inority', str(j)):
#         # #print(j)
#         continue
#     else:
#         # #print('***not found***')
#         # #print(j)
#         # #print("**************")
#         # #print(leader_rankings_df.iloc[i,:].to_string())
#         df2 = pd.DataFrame(columns=['state', 'position', 'chamber'])
#         df2 = df2._append(leader_rankings_df.iloc[i], ignore_index=True)
#         # #print(type(df))
#         leader_dfs.append(df2)
#         # #print('\n')

# outliers = pd.concat(leader_dfs)


In [None]:
# ranking_file = r"C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\leader_rankings.csv"
# rankings = pd.read_csv(ranking_file)


# file = r"C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\leadership_ranking.xlsx"
# leader_rankings_df = pd.read_excel(file)
# #print(*leader_rankings_df.columns)
# leader_rankings_df['state'] = leader_rankings_df['state'].fillna(method="ffill")

# n = len(leader_rankings_df)
# break_point = False

# for i,j in enumerate(leader_rankings_df['state']):
#     if "Wyoming" in str(j) and "Alabama" in leader_rankings_df['state'].iloc[i+1]:
#         index_stop = i + 1
#         break_point = True


#     else:
#         continue

#     if break_point == True:
#         house_list = ['House']*index_stop
#         senate_list = ['Senate']*(n-index_stop)
#         full_list = house_list + senate_list
#         leader_rankings_df['chamber'] = full_list
#         leader_rankings_df.dropna(inplace=True)
#         break

# os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data')
# leader_rankings_df.to_csv('leader_rankings.csv', index_label= False, index=False)

# #print(leader_rankings_df[leader_rankings_df['state'].str.contains('Connecticut')].to_string())

