**Created by:** Revekka Gershovich 
**When:** Nov 19, 2024 
**Purpose:** To match FD level to states for each year to append this dataset to the one in memory in Stata in clean_for_reg.do right before merging with polstate1 and postate2 datasets, i.e. political similarity datasets

In [1]:
import os
import os.path as path
import pandas as pd
import numpy as np

In [2]:
# parent_dir = os.path.abspath("/Users/rivka666/Dropbox (MIT)/StateLaws")
parent_dir = os.path.abspath("/Users/revekkagershovich/Dropbox (MIT)/StateLaws")
os.chdir(parent_dir)
assert os.path.exists(parent_dir), "parent_dir does not exist"
data_dir = "./2_data/2_intermediate/political_data"
assert os.path.exists(data_dir), "Data directory does not exist"

In [3]:
years = range(1833, 2021, 2)
states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 
          'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 
          'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 
          'VA', 'WA', 'WV', 'WI', 'WY']
fd = ['FD']  # Representing the Federal Level

# Generate all combinations of year, FD with each state as state1 and state2
data = []

# Generate pairs: FD as state1 and states as state2, and vice versa
for year in years:
    for state in states:
        data.append({'year': year, 'state1': 'FD', 'state2': state})
        data.append({'year': year, 'state1': state, 'state2': 'FD'})

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

In [4]:
df['censussimilarity'] = np.nan
df['distance'] = np.nan
df['rank_dist'] = np.nan
df['share_border_state1'] = np.nan
df['share_border_state2'] = 0
df['expanded'] = 0

# Rename 'year' column to 'year1' and duplicate it as 'year2'
df.rename(columns={'year': 'year1'}, inplace=True)
df['year2'] = df['year1']

In [5]:
# Display or save the DataFrame
print(df.sample(10))

      year1 state1 state2  censussimilarity  distance  rank_dist  \
5761   1947     NM     FD               NaN       NaN        NaN   
1650   1865     FD     MT               NaN       NaN        NaN   
5078   1933     FD     SC               NaN       NaN        NaN   
5547   1943     MS     FD               NaN       NaN        NaN   
950    1851     FD     MT               NaN       NaN        NaN   
6857   1969     NH     FD               NaN       NaN        NaN   
8727   2007     IN     FD               NaN       NaN        NaN   
1449   1861     MO     FD               NaN       NaN        NaN   
1988   1871     FD     VT               NaN       NaN        NaN   
8368   1999     FD     OH               NaN       NaN        NaN   

      share_border_state1  share_border_state2  expanded  year2  
5761                  NaN                    0         0   1947  
1650                  NaN                    0         0   1865  
5078                  NaN                    0       

In [6]:
# Create a dictionary mapping state abbreviations to full state names
state_name_map = {
    'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas', 'CA': 'California',
    'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia',
    'HI': 'Hawaii', 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas',
    'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland', 'MA': 'Massachusetts',
    'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi', 'MO': 'Missouri', 'MT': 'Montana',
    'NE': 'Nebraska', 'NV': 'Nevada', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico',
    'NY': 'New York', 'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma',
    'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina', 'SD': 'South Dakota',
    'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah', 'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington',
    'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming', 'FD': 'Federal'  # Add "Federal" for FD
}

# Map the full state names to statename1 and statename2
df['statename1'] = df['state1'].map(state_name_map)
df['statename2'] = df['state2'].map(state_name_map)


In [7]:
df.head()

Unnamed: 0,year1,state1,state2,censussimilarity,distance,rank_dist,share_border_state1,share_border_state2,expanded,year2,statename1,statename2
0,1833,FD,AL,,,,,0,0,1833,Federal,Alabama
1,1833,AL,FD,,,,,0,0,1833,Alabama,Federal
2,1833,FD,AK,,,,,0,0,1833,Federal,Alaska
3,1833,AK,FD,,,,,0,0,1833,Alaska,Federal
4,1833,FD,AZ,,,,,0,0,1833,Federal,Arizona


In [8]:
df.to_csv(path.join(data_dir, 'federal_matching.csv'), index=False)