# Data Cleaning code for Marriage and Divorce Rate Charts

In [51]:
import pandas as pd
import json
from datetime import datetime

In [68]:
# Importing datasets
marriage = pd.read_csv('../data/state-marriage-rates.csv')
divorce = pd.read_csv('../data/state-divorce-rates.csv')
sah = pd.read_csv('../data/stay-at-home.csv')
fips = pd.read_csv('../data/us-state-ansi-fips.csv')

In [78]:
# Convert date objects
sah['Date enacted'] = pd.to_datetime(sah['Date enacted']).dt.date
sah['Date lifted'] = pd.to_datetime(sah['Date lifted']).dt.date

def unix_stamp(date):
    return  datetime.strptime(date,"%Y-%m-%d").date()

sah['Group'] = ""

for i, val in enumerate(sah['State']):
    if sah['Date enacted'][i] < unix_stamp("2020-03-22"):
        sah['Group'][i] = 'Came into effect before March 22'
    elif unix_stamp("2020-03-22") <= sah['Date enacted'][i] < unix_stamp("2020-03-29"):
        sah['Group'][i] = 'Came into effect before March 29'
    elif unix_stamp("2020-03-29") <= sah['Date enacted'][i] < unix_stamp("2020-04-05"):
        sah['Group'][i] = 'Came into effect before April 5'
    elif unix_stamp("2020-04-05") <= sah['Date enacted'][i] < unix_stamp("2020-04-12"):
        sah['Group'][i] = 'Came into effect before April 12'
    else:
        sah['Group'][i] = "No Statewide order"

#Merge to get Fips codes
full_sah = sah.merge(fips, left_on='State', right_on='stname')

#Remove extraneous variables and rename
del full_sah[" stusps"]
del full_sah["stname"]
full_sah = full_sah.rename(columns={" st": "id"})

#Export the map data
full_sah.to_csv('../data/sah_map_data.csv', index=False)

  if sah['Date enacted'][i] < unix_stamp("2020-03-22"):
  elif unix_stamp("2020-03-22") <= sah['Date enacted'][i] < unix_stamp("2020-03-29"):
  elif unix_stamp("2020-03-29") <= sah['Date enacted'][i] < unix_stamp("2020-04-05"):
  elif unix_stamp("2020-04-05") <= sah['Date enacted'][i] < unix_stamp("2020-04-12"):


In [41]:
## Data for Slope Graph
# Will need to take data from wide to long 

def slope_data_clean(df, type):
    '''
    Function to clean data for use in Observables slope chart infrastructure.
    '''

    if type == "Marriage":
        val_name = 'mar_rate'
        path_name = '../data/marriage_long.json'
    elif type == "Divorce":
        val_name = 'div_rate'
        path_name = '../data/divorce_long.json'

        #Melt the wide data to long
    df_long = pd.melt(df, id_vars='State', var_name='year', value_name=val_name)

        #Convert year to date
    df_long['year'] = pd.to_numeric(df_long['year'])
    df_long[val_name] = pd.to_numeric(df_long[val_name])

        # Filter 2019 and 2020
    df_long = df_long[df_long.year >= 2019]   
        #
    df_long = df_long.to_dict('records')

    with open(path_name, 'w') as fp:
        json.dump(df_long, fp)


In [44]:
## Running Slope clean
slope_data_clean(marriage, "Marriage")
slope_data_clean(divorce, "Divorce")

In [11]:
# Merging data for grouping

group_df = pd.DataFrame({'State': marriage['State'], 'mar_rate_19': marriage['2019'], 
                        'mar_rate_20': marriage['2020'], 'div_rate_19': divorce['2019'],
                        'div_rate_20': divorce['2020'], 'stay_at_home': sah['Stay-at-home'],
                        'sah_enacted': sah['Date enacted'], 'sah_lifted': sah['Date lifted']})




Unnamed: 0,State,mar_rate_19,mar_rate_20,div_rate_19,div_rate_20,stay_at_home,sah_enacted,sah_lifted
0,Alabama,6.7,7.2,3.6,3.7,Yes,2020-04-04,2020-04-30
1,Alaska,6.5,5.7,3.6,3.2,Yes,2020-03-28,2020-04-21
2,Arizona,5.3,4.9,2.9,2.9,Yes,2020-03-31,2020-05-15
3,Arkansas,8.4,7.8,4.0,3.6,No,NaT,NaT
4,California,5.7,3.2,,,Yes,2020-03-19,2020-06-15
5,Colorado,7.3,6.7,3.3,2.9,Yes,2020-03-26,2020-04-26


In [50]:
## Data for US divorce and marriage rate line chart

us = pd.read_csv('../data/national-mar-div-rates.csv')

us = pd.melt(us, id_vars='Year')

us.to_csv('../data/us_mar_div.csv',index=False)