In [37]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline
import os

# Raw Data
import pandas as pd
dr = pd.read_csv('../Data/deathrate.csv')

# Rename Column
dr = dr.rename(columns = {"Unnamed: 0": "Number"}) 

# Drop Mc Kean County and kalawao County due to missing poverty data
dr = dr[dr['County'] != 'Mc Kean County, PA']
dr = dr[dr['County'] != 'Kalawao County, HI']

# Drop duplicate rows
dr = dr[dr['Number'] != 788]
dr = dr[dr['Number'] != 3924]
dr = dr[dr['Number'] != 7060]
dr = dr[dr['Number'] != 10196]

# Extract out state and county to two new columns
dr['State'] = dr.apply(lambda row: row['County'][-2:], axis=1)
dr['City'] = dr.apply(lambda row: row['County'].split(',')[0], axis=1)

# Merge latitude and longitude with county
# Obatined from https://simplemaps.com/data/us-cities
cities = pd.read_csv('../Data/cities.csv')
cities = cities.drop(columns=['city_ascii', 'state_name', 'population', 'population_proper', 'density', 'source', 'incorporated', 'timezone', 'zips', 'id'])
cities_mean = cities.groupby('county_fips').mean()
dr = dr.merge(cities_mean, left_on='FIPS', right_on='county_fips', how='left')

# Manually insert missing latatiude and longitude
# Obtained data from google
dr.loc[dr['FIPS'] == 44001, 'lat'] = 41.7258
dr.loc[dr['FIPS'] == 44001, 'lng'] = -71.3112
dr.loc[dr['FIPS'] == 46113, 'lat'] = 43.2437
dr.loc[dr['FIPS'] == 46113, 'lng'] = -102.6216
dr.loc[dr['FIPS'] == 2270, 'lat'] = 62.1458
dr.loc[dr['FIPS'] == 2270, 'lng'] = -162.8919


In [5]:
# Generate distance Matrix

dr = dr[dr['Year'] == 1999]
dr = dr[dr['State'] == "KY"]
#dr = dr.head(4)
matrix = np.zeros([120,120])
matrix
row=0
column=0

for county_row in dr.County.unique():
    row = 0
    for county_column in dr.County.unique():
        latitude = float(dr[dr.County==county_row].lat) - float(dr[dr.County==county_column].lat)
        longitude = float(dr[dr.County==county_row].lng) - float(dr[dr.County==county_column].lng)
        matrix[row][column] = latitude**2 + longitude**2
        row += 1
    column += 1

matrix = np.around(matrix, decimals=3)
matrix

    

array([[0.   , 1.056, 0.997, ..., 1.564, 3.813, 1.251],
       [1.056, 0.   , 3.469, ..., 4.621, 8.877, 4.114],
       [0.997, 3.469, 0.   , ..., 2.352, 2.231, 0.037],
       ...,
       [1.564, 4.621, 2.352, ..., 0.   , 1.456, 2.186],
       [3.813, 8.877, 2.231, ..., 1.456, 0.   , 1.738],
       [1.251, 4.114, 0.037, ..., 2.186, 1.738, 0.   ]])

In [6]:
matrix_neighbor = np.zeros([120,120])
for j in range(120):
    for i in range(120):
        if matrix[i][j] < 0.36:
            matrix_neighbor[i][j] = 1
        else:
            matrix_neighbor[i][j] = 0

index = 0
for row in matrix_neighbor:
    matrix_neighbor[index][index] = sum(row)-1
    index += 1

delta = np.zeros([1140,120])
index = 0
for j in range(120):
    for i in range(int(matrix_neighbor[j][j])):
        delta[index+i][j] = -1
    index += int(matrix_neighbor[j][j])

index = 0
for i in range(120):
    for j in range(120):
        if int(matrix_neighbor[i][j]) == 1:
            delta[index][j] = 1
            index += 1

pd.DataFrame(delta)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,110,111,112,113,114,115,116,117,118,119
0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
def ridge(y, X, llambda, Sigma):
    return np.linalg.solve((X.T@X) + (llambda*Sigma.T@Sigma), (X.T@y))

In [44]:
def get_beta(year, state, llambda):
    df = dr[dr['Year'] == year]
    df = df[df['State'] == state]
    y = df['Deathrate']
    n = len(y)
    X = np.eye(n)
    beta = ridge(y, X, llambda, Sigma = delta)
    return beta

In [51]:
def get_df(year,state,llambda):
    beta = get_beta(year, state, llambda)
    df = dr[dr['Year'] == year]
    df = df[df['State'] == state]
    df['beta'] = beta
    return df

In [57]:
df = get_df(1999, 'KY', 0.1)
df.to_csv('../Output/beta.csv',index = False)