# Import Libraries and Read in Files

In [2]:
import pandas as pd
import numpy as np

# Cleaning Comprehensive Housing Affordability Strategy (CHAS) Dataset

In [3]:
chas = pd.read_csv('assets/ACS_5YR_CHAS_Estimate_Data_by_County_-4190529618979970776.csv')
chas.shape

(3221, 408)

In [4]:
# After reading the data dictionary, these are the columns I found to be most relevant to our work

# We use this to join data
# GEOID - Geographic Identifier - fully concatenated geographic code (State FIPS and County FIPS)
# STATE - Census 2-digit FIPS State Code
# COUNTY - 3-digit County FIPS code
# NAME - COUNTY NAME

# T2_EST1 - Total Households

# Cost-burden county by Department of house and urban Development(HUD)
# T8_EST69 , T8_EST82, T8_EST95 , T8_EST108 - Contain cost-burdened household for =< 30% , 30-50% ,50-80% and all 

# already made percentage 
# T8_CB_PCT - Percent of all households with cost burden over 30% 
# T8_CB50_PCT - Percent of all households with cost burden over 30%
# T8_LE30_CB_PCT, T8 LE30_CB50_PCT - Percent of extremely low income households (household income less than or equal to 30% HAMFI) with cost burden over 30%
# T8_LE50_CB_PCT, T8_LE50_CB50_PCT
# T8_LE80_CB_PCT, T8_LE80_CB_50_PCT 

# renter / owner break-outs - to see who's hurting
# T7_SF_CB_R - Renter occupied AND household type is small family (2 persons, neither person 62 years or over, or 3 or 4 persons) AND housing cost burden is greater than 30%
#  T7_LF_CB_R  - Renter occupied AND household type is large family (5 or more persons) AND housing cost burden is greater than 30%
# T7_SF_CB_O - Owner occupied AND household type is small family (2 persons, neither person 62 years or over, or 3 or 4 persons) AND housing cost burden is greater than 30%
#  T7_LF_CB_O - Owner occupied AND household type is large family (5 or more persons) AND housing cost burden is greater than 30%


columns_to_keep = [
    "GEOID", "STATE", "COUNTY", "NAME",
    "T2_EST1",        
    "T8_EST69",      
    "T8_EST82",           
    "T8_EST95",         
    "T8_EST108",    
    "T8_LE30_CB_PCT", "T8_LE30_CB50_PCT",
    "T8_LE50_CB_PCT", "T8_LE50_CB50_PCT",
    "T8_LE80_CB_PCT", "T8_LE80_CB50_PCT",
    "T8_CB_PCT", "T8_CB50_PCT",
    "T7_SF_CB_R", "T7_LF_CB_R", "T7_SF_CB_O", "T7_LF_CB_O",
]
chas = chas[columns_to_keep].copy()


In [5]:
#check for missing value
subset = chas[columns_to_keep]
missing_counts   = subset.isna().sum()   

missing_counts

GEOID               0
STATE               0
COUNTY              0
NAME                0
T2_EST1             0
T8_EST69            0
T8_EST82            0
T8_EST95            0
T8_EST108           0
T8_LE30_CB_PCT      1
T8_LE30_CB50_PCT    1
T8_LE50_CB_PCT      1
T8_LE50_CB50_PCT    1
T8_LE80_CB_PCT      0
T8_LE80_CB50_PCT    0
T8_CB_PCT           0
T8_CB50_PCT         0
T7_SF_CB_R          0
T7_LF_CB_R          0
T7_SF_CB_O          0
T7_LF_CB_O          0
dtype: int64

In [6]:
colums_renames = {

    "GEOID"        : "GEOID",
    "STATE"        : "STATE",
    "COUNTY"       : "COUNTY",
    "NAME"         : "NAME",

    "T2_EST1"      : "total_households",
    "T8_EST69"     : "cost_burdened_households_leq_30_percent",
    "T8_EST82"     : "cost_burdened_households_30_to_50_percent",
    "T8_EST95"     : "cost_burdened_households_50_to_80_percent",
    "T8_EST108"    : "cost_burdened_households_all_income_levels_count",

    "T8_CB_PCT"        : "Percent_of_all_with_cb_over30_percent",
    "T8_CB50_PCT"      : "Percent_of_all_with_cb_over50_percent",
    
    "T8_LE30_CB_PCT"   : "Percent_of_low_income_cb_over30_percent",
    "T8_LE30_CB50_PCT" : "Percent_of_low_income_cb_over50_percent",

    "T8_LE50_CB_PCT"   : "Percent_of_medi_income_cb_over30_percent",
    "T8_LE50_CB50_PCT" : "Percent_of_medi_income_cb_over50_percent",

    "T8_LE80_CB_PCT"   : "Percent_of_high_income_cb_over30_percent",
    "T8_LE80_CB50_PCT" : "Percent_of_high_income_cb_over50_percent",

    "T7_SF_CB_R"   : "Percent_renter_small_family_count_cb_over30",
    "T7_LF_CB_R"   : "Percent_renter_large_family_count_cb_over50",
    "T7_SF_CB_O"   : "Percent_owner_small_family_count_cb_over30",
    "T7_LF_CB_O"   : "Percent_owner_large_family_count_cb_over50",
}

chas.rename(columns= colums_renames, inplace=True)

In [7]:
print(chas.isna().sum().sort_values(ascending=False).head(10))
print(chas.describe().T[['min','max','mean']])

Percent_of_medi_income_cb_over50_percent            1
Percent_of_low_income_cb_over30_percent             1
Percent_of_low_income_cb_over50_percent             1
Percent_of_medi_income_cb_over30_percent            1
GEOID                                               0
total_households                                    0
NAME                                                0
COUNTY                                              0
STATE                                               0
cost_burdened_households_all_income_levels_count    0
dtype: int64
                                                     min         max  \
GEOID                                             1001.0    72153.00   
STATE                                                1.0       72.00   
COUNTY                                               1.0      840.00   
total_households                                    60.0  3332505.00   
cost_burdened_households_leq_30_percent              0.0   531195.00   
cost_burdened_h

In [8]:
chas.to_csv('clean_data/clean_chas.csv', index= False)

# Cleaning Socioeconomic Estimate Dataset

In [9]:
soc = pd.read_csv('assets/ACS_5YR_Socioeconomic_Estimate_Data_by_County_-8265857031681572420.csv')
soc.shape

(3221, 146)

In [10]:
soc.columns

Index(['OBJECTID_1', 'GEOID', 'STATE', 'STATE_NAME', 'COUNTY', 'NAME',
       'B08013EST1', 'B08013_AVG_TTW', 'B08303EST1', 'B08303_30MINUS_TTW',
       ...
       'B23006EST29', 'B23006EST29_PCT', 'B24021EST2', 'B24021EST17',
       'B24021EST25', 'B24021EST28', 'B24021EST29', 'B24021EST32',
       'Shape__Area', 'Shape__Length'],
      dtype='object', length=146)

In [11]:
# After reading the data dictionary, I found that these columns are most relevant to our work

# 4 Primary key  'GEOID', 'STATE',  'COUNTY', 'NAME'

# Household information - - ability to pay 
# 'B19013EST1' = household income/poverty   
# 'B19113EST1' = family income 
# 'B19202EST1' = median single income

# 'B17021EST2_PCT' = POVERTY RATE
# 'B23001_UE_PCT' = unempolyment rate


# 'B25106_CB_LT35' = cost-burdened < 35%
# 'B25106_CB_GT35' - cost-burdened >= 35%

# 'B25014_CROWD_PCT' = households with more than 1 person or room

# 'B17019_RENT' =  Renter-occupied 
# 'B17019_OWN'  =  Owner-occupied 



In [12]:
rename_soc = {
    'B19013EST1':'median_household_income',
    'B19113EST1':'median_family_income',
    'B19202EST1':'median_nonfamily_income',
    'B17021EST2_PCT':'poverty_rate_pct',
    'B23001_UE_PCT':'unemployment_rate_pct',
    'B25106_CB_R_LT35':'renters_cb_lt35_cnt',
    'B25106_CB_R_GT35':'renters_cb_ge35_cnt',
    'B25106_CB_O_LT35':'owners_cb_lt35_cnt',
    'B25106_CB_O_GT35':'owners_cb_ge35_cnt',
    'B25014_CROWD_PCT':'crowded_hh_pct',
    'B17019_RENT':'renter_hh_cnt',
    'B17019_OWN':'owner_hh_cnt'
}


needed = ['GEOID','STATE','COUNTY','NAME', *rename_soc.keys()]
soc = soc[needed].rename(columns=rename_soc)


In [13]:
columns_soc = [
    'GEOID','STATE','COUNTY','NAME',
    'median_household_income',
    'median_family_income',
    'median_nonfamily_income',
    'poverty_rate_pct',
    'unemployment_rate_pct',
    'renters_cb_lt35_cnt',
    'renters_cb_ge35_cnt',
    'owners_cb_lt35_cnt',
    'owners_cb_ge35_cnt',
    'crowded_hh_pct',
    'renter_hh_cnt',
    'owner_hh_cnt'
]

subset = soc[columns_soc]
missing_counts = subset.isna().sum()   

missing_counts
# Result show there are 13 missing row on the median_nonfamily_income
# As this is <1% of the data, it won't impact much of the analysis if we leave it in

GEOID                       0
STATE                       0
COUNTY                      0
NAME                        0
median_household_income     1
median_family_income        1
median_nonfamily_income    13
poverty_rate_pct            0
unemployment_rate_pct       0
renters_cb_lt35_cnt         0
renters_cb_ge35_cnt         0
owners_cb_lt35_cnt          0
owners_cb_ge35_cnt          0
crowded_hh_pct              0
renter_hh_cnt               0
owner_hh_cnt                0
dtype: int64

In [14]:
print(soc.isna().sum().sort_values(ascending=False).head(10))
print(soc.describe().T[['min','max','mean']])

median_nonfamily_income    13
median_household_income     1
median_family_income        1
GEOID                       0
NAME                        0
COUNTY                      0
STATE                       0
poverty_rate_pct            0
unemployment_rate_pct       0
renters_cb_lt35_cnt         0
dtype: int64
                             min        max          mean
GEOID                     1001.0   72153.00  31384.438684
STATE                        1.0      72.00     31.281590
COUNTY                       1.0     840.00    102.849115
median_household_income  12283.0  147111.00  54171.514596
median_family_income     15478.0  182567.00  67463.473602
median_nonfamily_income   4783.0   96421.00  30964.596322
poverty_rate_pct             0.0      66.19     15.380863
unemployment_rate_pct        0.0      34.85      5.450699
renters_cb_lt35_cnt          0.0  528543.00   4329.934492
renters_cb_ge35_cnt          0.0  449504.00   1878.503881
owners_cb_lt35_cnt           0.0  156577.00   263

In [15]:
soc.to_csv('clean_data/clean_Soc.csv', index =False)

# Merge CHAS and Socioeconomic Estimate Dataset

In [16]:
chas = pd.read_csv('clean_data/clean_chas.csv', dtype = {'GEOID':'string'})
soc = pd.read_csv('clean_data/clean_Soc.csv', dtype = {'GEOID':'string','STATE':'string', 'COUNTY':'string'})

In [17]:
soc_chas = (
    chas.merge(soc, on='GEOID', how='left', validate='one_to_one')
        .astype({'STATE_x':'string'})  
        .drop(columns=['STATE_y','COUNTY_y','NAME_y'])
        .rename(columns={'STATE_x':'STATE','COUNTY_x':'COUNTY','NAME_x':'NAME'})
)
soc_chas

Unnamed: 0,GEOID,STATE,COUNTY,NAME,total_households,cost_burdened_households_leq_30_percent,cost_burdened_households_30_to_50_percent,cost_burdened_households_50_to_80_percent,cost_burdened_households_all_income_levels_count,Percent_of_low_income_cb_over30_percent,...,median_nonfamily_income,poverty_rate_pct,unemployment_rate_pct,renters_cb_lt35_cnt,renters_cb_ge35_cnt,owners_cb_lt35_cnt,owners_cb_ge35_cnt,crowded_hh_pct,renter_hh_cnt,owner_hh_cnt
0,1043,1,43,Cullman,31735,1630,1605,1810,640,65.91,...,25506.0,12.69,5.01,2657,183,2487,1033,1.94,1099,1046
1,1045,1,45,Dale,19405,1605,1760,1530,700,63.00,...,26615.0,18.38,9.02,2769,350,1321,252,0.88,1455,437
2,1047,1,47,Dallas,15410,2455,1485,930,790,59.38,...,18816.0,29.22,10.38,2960,66,1234,249,1.23,1377,564
3,1049,1,49,DeKalb,26365,1810,1660,1570,580,62.91,...,21406.0,20.71,4.45,2271,67,2457,693,2.95,1406,1555
4,1051,1,51,Elmore,29795,1510,1880,1350,855,75.58,...,35766.0,10.58,4.07,2917,576,1907,1356,1.31,1286,599
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3216,51650,51,650,Hampton,54845,6095,3765,5545,2720,81.51,...,40306.0,15.29,6.12,8380,3688,3950,4347,1.46,2651,863
3217,51660,51,660,Harrisonburg,16750,2730,1740,1835,1135,77.50,...,30732.0,25.55,4.74,3731,695,496,648,3.56,812,115
3218,51670,51,670,Hopewell,9360,1995,1145,720,250,76.17,...,25772.0,24.70,8.41,1984,253,616,355,3.13,1105,147
3219,51678,51,678,Lexington,2065,315,125,100,105,75.76,...,25323.0,22.14,3.36,361,56,157,69,0.00,32,0


In [18]:
print(soc_chas.isna().sum().sort_values(ascending=False))

median_nonfamily_income                             13
median_household_income                              1
Percent_of_medi_income_cb_over50_percent             1
Percent_of_low_income_cb_over50_percent              1
Percent_of_medi_income_cb_over30_percent             1
Percent_of_low_income_cb_over30_percent              1
median_family_income                                 1
GEOID                                                0
STATE                                                0
cost_burdened_households_all_income_levels_count     0
cost_burdened_households_50_to_80_percent            0
cost_burdened_households_30_to_50_percent            0
cost_burdened_households_leq_30_percent              0
Percent_of_high_income_cb_over30_percent             0
COUNTY                                               0
NAME                                                 0
total_households                                     0
Percent_of_all_with_cb_over50_percent                0
Percent_of

# Clean Redfin Dataset

In [19]:
redfin_county_data = pd.read_csv('assets\county_market_tracker.tsv000.gz', sep="\t", header=0, dtype=str)

In [20]:
redfin_county_data['PERIOD_BEGIN'] = pd.to_datetime(redfin_county_data['PERIOD_BEGIN'])
redfin_county_data['PERIOD_END'] = pd.to_datetime(redfin_county_data['PERIOD_END'])
redfin_county_data['MEDIAN_SALE_PRICE'] = redfin_county_data['MEDIAN_SALE_PRICE'].astype('float')
redfin_county_data['MEDIAN_LIST_PRICE'] = redfin_county_data['MEDIAN_LIST_PRICE'].astype('float')

In [21]:
redfin_county_data.head()

Unnamed: 0,PERIOD_BEGIN,PERIOD_END,PERIOD_DURATION,REGION_TYPE,REGION_TYPE_ID,TABLE_ID,IS_SEASONALLY_ADJUSTED,REGION,CITY,STATE,...,SOLD_ABOVE_LIST_YOY,PRICE_DROPS,PRICE_DROPS_MOM,PRICE_DROPS_YOY,OFF_MARKET_IN_TWO_WEEKS,OFF_MARKET_IN_TWO_WEEKS_MOM,OFF_MARKET_IN_TWO_WEEKS_YOY,PARENT_METRO_REGION,PARENT_METRO_REGION_METRO_CODE,LAST_UPDATED
0,2023-02-01,2023-02-28,30,county,5,1454,False,"Freeborn County, MN",,Minnesota,...,,,,,1.0,,,"Albert Lea, MN",10660.0,2025-05-12 14:39:59.302 Z
1,2012-09-01,2012-09-30,30,county,5,3224,False,"Trempealeau County, WI",,Wisconsin,...,0.1,,,,0.0,0.0,0.0,Wisconsin nonmetropolitan area,,2025-05-12 14:39:59.302 Z
2,2020-10-01,2020-10-31,30,county,5,2474,False,"Spartanburg County, SC",,South Carolina,...,0.0,0.1111111111,-0.1111111111,-0.2222222222,0.3333333333,,-0.2666666667,"Spartanburg, SC",43900.0,2025-05-12 14:39:59.302 Z
3,2016-09-01,2016-09-30,30,county,5,3045,False,"Fredericksburg, VA",,Virginia,...,0.0,0.1818181818,,0.01515151515,,,,"Washington, DC",47894.0,2025-05-12 14:39:59.302 Z
4,2023-09-01,2023-09-30,30,county,5,477,False,"Marion County, FL",,Florida,...,-0.0362325888,0.297182555,-0.01216087935,-0.0188841702,0.2881906826,-0.03354026758,-0.06981902618,"Ocala, FL",36100.0,2025-05-12 14:39:59.302 Z


In [22]:
redfin_county_data_2016_2020 = redfin_county_data[(redfin_county_data['PERIOD_BEGIN'] >= '2016-01-01') & (redfin_county_data['PERIOD_BEGIN'] <= '2020-12-31')].copy()

In [23]:
def extract_county_name(row): 
    full_county_name = row['REGION']
    if "County" in full_county_name: 
        return full_county_name.split(" County")[0]
    else: 
        return full_county_name.split(" Parish")[0]

In [24]:
redfin_county_data_2016_2020['COUNTY_NAME'] = redfin_county_data_2016_2020.apply(func=extract_county_name, axis=1)

In [25]:
redfin_county_data_2016_2020['COUNTY_NAME']

2                 Spartanburg
3          Fredericksburg, VA
5                    Lawrence
9                     Johnson
10                       Hall
                  ...        
1238351                 Sharp
1238354               McClain
1238356          Pend Oreille
1238362                 Boone
1238373               Norfolk
Name: COUNTY_NAME, Length: 482562, dtype: object

In [26]:
redfin_county_data_2016_2020.columns

Index(['PERIOD_BEGIN', 'PERIOD_END', 'PERIOD_DURATION', 'REGION_TYPE',
       'REGION_TYPE_ID', 'TABLE_ID', 'IS_SEASONALLY_ADJUSTED', 'REGION',
       'CITY', 'STATE', 'STATE_CODE', 'PROPERTY_TYPE', 'PROPERTY_TYPE_ID',
       'MEDIAN_SALE_PRICE', 'MEDIAN_SALE_PRICE_MOM', 'MEDIAN_SALE_PRICE_YOY',
       'MEDIAN_LIST_PRICE', 'MEDIAN_LIST_PRICE_MOM', 'MEDIAN_LIST_PRICE_YOY',
       'MEDIAN_PPSF', 'MEDIAN_PPSF_MOM', 'MEDIAN_PPSF_YOY', 'MEDIAN_LIST_PPSF',
       'MEDIAN_LIST_PPSF_MOM', 'MEDIAN_LIST_PPSF_YOY', 'HOMES_SOLD',
       'HOMES_SOLD_MOM', 'HOMES_SOLD_YOY', 'PENDING_SALES',
       'PENDING_SALES_MOM', 'PENDING_SALES_YOY', 'NEW_LISTINGS',
       'NEW_LISTINGS_MOM', 'NEW_LISTINGS_YOY', 'INVENTORY', 'INVENTORY_MOM',
       'INVENTORY_YOY', 'MONTHS_OF_SUPPLY', 'MONTHS_OF_SUPPLY_MOM',
       'MONTHS_OF_SUPPLY_YOY', 'MEDIAN_DOM', 'MEDIAN_DOM_MOM',
       'MEDIAN_DOM_YOY', 'AVG_SALE_TO_LIST', 'AVG_SALE_TO_LIST_MOM',
       'AVG_SALE_TO_LIST_YOY', 'SOLD_ABOVE_LIST', 'SOLD_ABOVE_LIST_MOM',
 

In [27]:
columns_to_keep = ['PERIOD_BEGIN', 'PERIOD_END', 'COUNTY_NAME', 'STATE', 'STATE_CODE', 'MEDIAN_SALE_PRICE', 'MEDIAN_LIST_PRICE']

In [28]:
redfin_data = redfin_county_data_2016_2020[columns_to_keep].copy()

In [29]:
# redfin_data.to_csv('clean_data/clean_redfin.csv', index = False)


# Matching to FIPS Code and finalize data set

In [33]:
fips = pd.read_csv('assets\State__County_and_City_FIPS_Reference_Table_20250612.csv')

In [34]:
fips.dropna(inplace=True)
fips.drop_duplicates(subset=['State Name', 'County Name', 'State Code', 'County Code', 'StCnty FIPS Code' ], keep='first', inplace=True)
fips.drop(labels=['City Name', 'City Code', 'StCntyCity FIPS Code'], axis=1, inplace=True)

In [35]:
fips['StCnty FIPS Code'] = fips['StCnty FIPS Code'].astype('int')

In [36]:
soc_chas['GEOID'] = soc_chas['GEOID'].astype('int')

In [37]:
soc_chas.merge(right=fips, left_on='GEOID', right_on = 'StCnty FIPS Code', how='left')

Unnamed: 0,GEOID,STATE,COUNTY,NAME,total_households,cost_burdened_households_leq_30_percent,cost_burdened_households_30_to_50_percent,cost_burdened_households_50_to_80_percent,cost_burdened_households_all_income_levels_count,Percent_of_low_income_cb_over30_percent,...,owners_cb_ge35_cnt,crowded_hh_pct,renter_hh_cnt,owner_hh_cnt,State Name,County Name,State Code,State FIPS Code,County Code,StCnty FIPS Code
0,1043,1,43,Cullman,31735,1630,1605,1810,640,65.91,...,1033,1.94,1099,1046,ALABAMA,CULLMAN,AL,1.0,C043,1043.0
1,1045,1,45,Dale,19405,1605,1760,1530,700,63.00,...,252,0.88,1455,437,ALABAMA,DALE,AL,1.0,C045,1045.0
2,1047,1,47,Dallas,15410,2455,1485,930,790,59.38,...,249,1.23,1377,564,ALABAMA,DALLAS,AL,1.0,C047,1047.0
3,1049,1,49,DeKalb,26365,1810,1660,1570,580,62.91,...,693,2.95,1406,1555,ALABAMA,DE KALB,AL,1.0,C049,1049.0
4,1051,1,51,Elmore,29795,1510,1880,1350,855,75.58,...,1356,1.31,1286,599,ALABAMA,ELMORE,AL,1.0,C051,1051.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3216,51650,51,650,Hampton,54845,6095,3765,5545,2720,81.51,...,4347,1.46,2651,863,VIRGINIA,HAMPTON,VA,51.0,C650,51650.0
3217,51660,51,660,Harrisonburg,16750,2730,1740,1835,1135,77.50,...,648,3.56,812,115,VIRGINIA,HARRISONBURG,VA,51.0,C660,51660.0
3218,51670,51,670,Hopewell,9360,1995,1145,720,250,76.17,...,355,3.13,1105,147,VIRGINIA,HOPEWELL,VA,51.0,C670,51670.0
3219,51678,51,678,Lexington,2065,315,125,100,105,75.76,...,69,0.00,32,0,VIRGINIA,LEXINGTON,VA,51.0,C678,51678.0


In [38]:
redfin_data['COUNTY_NAME']

2                 Spartanburg
3          Fredericksburg, VA
5                    Lawrence
9                     Johnson
10                       Hall
                  ...        
1238351                 Sharp
1238354               McClain
1238356          Pend Oreille
1238362                 Boone
1238373               Norfolk
Name: COUNTY_NAME, Length: 482562, dtype: object

In [39]:
redfin_data['COUNTY_NAME'] = redfin_data['COUNTY_NAME'].str.upper()

In [40]:
fips.columns

Index(['State Name', 'County Name', 'State Code', 'State FIPS Code',
       'County Code', 'StCnty FIPS Code'],
      dtype='object')

In [41]:
redfin_data_with_fips = redfin_data.merge(right=fips, left_on=['COUNTY_NAME', 'STATE_CODE'], right_on=['County Name', 'State Code'])

In [42]:
redfin_data_with_fips.columns

Index(['PERIOD_BEGIN', 'PERIOD_END', 'COUNTY_NAME', 'STATE', 'STATE_CODE',
       'MEDIAN_SALE_PRICE', 'MEDIAN_LIST_PRICE', 'State Name', 'County Name',
       'State Code', 'State FIPS Code', 'County Code', 'StCnty FIPS Code'],
      dtype='object')

In [43]:
soc_chas_redfin = soc_chas.merge(right=redfin_data_with_fips, left_on='GEOID', right_on= 'StCnty FIPS Code')

In [47]:
soc_chas_redfin.to_csv('clean_data/soc_chas_redfin.csv', index = False)


In [49]:
import shutil

shutil.make_archive(
    base_name='clean_data\soc_chas_redfin', 
    format='zip',
    root_dir='clean_data',
    base_dir='soc_chas_redfin.csv'
)

'c:\\Users\\a5636\\OneDrive\\Documents\\GitHub\\SIADS593--Milestone-1\\clean_data\\soc_chas_redfin.zip'