In [802]:
import pandas as pd
import numpy as np
import os
import datetime

In [803]:
neighborcounties = pd.read_csv('neighborcounties.csv', encoding='latin1', index_col=0)
neighborstats = pd.read_csv('neighborstats.csv', encoding='latin1')
google_neighbor = pd.read_csv('google_neighbor.csv', encoding='latin1').drop(columns=['Unnamed: 0'])
Age_Race = pd.read_csv('Age_Race.csv', encoding='latin1',index_col=0)
Pop_60 = pd.read_csv('Pop_60.csv')
Density = pd.read_csv('Density.csv')

In [789]:
def fillframe_date(fips, date, value, colname, neighborcounties, neighborstats, min_neighbors=2):
    #Is able to fill a dataframe that has a date column by pivoting by date
    #And filling in NaNs by factor & date for each county, using fillcol method
    
    #Making Dataframe to sort values by dates, pivoting
    frame = pd.DataFrame(data = [fips,date,value])
    frame = frame.T
    frame.columns = ['FIPS', 'Date', colname]
    frame = frame.pivot(index = 'FIPS', columns='Date', values=[colname]).reset_index()
    
    #fixing up the frame to haev proper columns
    frame.to_csv('frame.csv')
    frame = pd.read_csv('frame.csv', encoding='latin1')
    frame = frame.rename(columns=frame.iloc[0]).drop(frame.index[0])
    frame.columns = frame.columns.fillna('FIPS')
    frame = frame.drop(columns = 'Date')
    
    #Iterating through each date to fill NaNs
    datelist = frame.columns    
    datelist = datelist[1:]
    for d in datelist:
        frame[d] = fillcol(frame['FIPS'], frame[d], neighborcounties, neighborstats, min_neighbors)
    #Unpivoting the frame so it can be returned
    frame = frame.melt(id_vars=['FIPS']).sort_values(['FIPS','variable']).reset_index(drop=True)
    frame.columns = ['FIPS', 'Date', colname]
    
    return frame

In [790]:
def fillcol(fips, value, neighborcounties, neighborstats, min_neighbors=2):
    #Takes in a column of fips codes, and any type of datafield with some NaNs,
    #Computes distance-weighted average of the value across all neighbors of NaN counties
    
    #Loading in the fips and value into proper dataframes
    df = pd.DataFrame(data = [fips,value]).T
    df.columns = ['FIPS', 'Values']
    df.Values = df.Values.astype(float)
    df = df.set_index('FIPS')
    
    #creating new column to set to the current dataframe values
    newcol = []
    for ind in df.index:
        #for any entries with NaNs
        if np.isnan(df['Values'][ind]):
            #list of neighbors for NaN county
            neighbors = list(neighborcounties[neighborcounties['orgfips'] == ind]['adjfips'])
            nonzero = 0
            weightedval = 0
            totalinvdist = 0
            totaldist = 0
            vals = 0
            #iterates though neighbors of NaN county with non-NaN entires
            for n in neighbors:
                if n in df.index:
                    if ~np.isnan(df['Values'][n]):
                        #Getting weighted values, using 1/dist as a scalar to show closer distance counts more
                        nonzero += 1
                        dist = list(neighborcounties.query('orgfips == ' + str(ind) + ' and adjfips == ' + str(n))['Pop_10'])[0]
                        totalinvdist += (1/dist)**1
                        weightedval += ((1/dist)**1)*df['Values'][n]
            #If there are at least 2 neighbors (this can be adjusted)
            if nonzero >= min_neighbors:
                newcol.append(weightedval/(totalinvdist))
            else:
                newcol.append(np.nan)
        else:
            newcol.append(df['Values'][ind])
    return newcol

In [791]:
def fillgoogle(colname, google_neighbor):
    #Method to fill up the google mobility data
    #Uses colname to designate which column to fill
    numnans = len(google_neighbor[np.isnan(google_neighbor[colname])])
    while numnans > 0:
        tempnum = numnans
        #Creating the filled column from method
        temp = fillframe_date(google_neighbor['FIPS'],google_neighbor['Date'],google_neighbor[colname], colname, neighborcounties, neighborstats)
        google_neighbor[colname] = temp[colname]
        numnans = len(google_neighbor[np.isnan(google_neighbor[colname])])
        #Checking if the number of nans changes
        if tempnum == numnans:
            #if number doesnt change, try again with only 1 neighbor, otherwise quit
            temp = fillframe_date(google_neighbor['FIPS'],google_neighbor['Date'],google_neighbor[colname], colname, neighborcounties, neighborstats, 1)
            google_neighbor[colname] = temp[colname]
            numnans = len(google_neighbor[np.isnan(google_neighbor[colname])])
            if tempnum == numnans:
                numnans = 0     
        
    return google_neighbor

In [792]:
def fillfixed(colname, data, code, neighborcounties, neighborstats):
    #Method to fill up the google mobility data
    #Uses colname to designate which column to fill
    numnans = len(data[np.isnan(data[colname])])
    while numnans > 0:
        tempnum = numnans
        #Creating the filled column from method
        newcol = fillcol(data[code], data[colname], neighborcounties, neighborstats)
        data[colname] = newcol
        numnans = len(data[np.isnan(data[colname])])
        #Checking if the number of nans changes
        if tempnum == numnans:
            #if number doesnt change, try again with only 1 neighbor, otherwise quit
            newcol = fillcol(data[code], data[colname], neighborcounties, neighborstats)
            data[colname] = newcol
            numnans = len(data[np.isnan(data[colname])])
            if tempnum == numnans:
                numnans = 0     
    return data    

In [793]:
def popfill(total,estimate):
    #Fills in the population of each county, merging density with Pop_60
    if np.isnan(total):
        return estimate
    else:
        return total

In [794]:
#List of all the counties with neighbor data
county_list = pd.DataFrame(neighborcounties.orgfips.unique(),columns = ['fips']).set_index('fips')
#Calculating the Total population for each county
Total_Pop = county_list.join(Pop_60.set_index('FIPS'), how='left')
#Adding in extra approximated rows for NaN values, using Density
Density['Pop'] = Density['2010 Density per square mile of land area - Population'] * Density['Area in square miles - Land area']
Density = Density[['FIPS', 'Pop']]
Total_Pop = Total_Pop.join(Density.set_index('FIPS'), how='left')
Total_Pop['Population'] = Total_Pop.apply(lambda x: popfill(x['total_pop'],x['Pop']), axis=1)
Total_Pop = Total_Pop['Population']
Total_Pop.to_csv('Total_Pop')

  # This is added back by InteractiveShellApp.init_path()


In [795]:
#Adding extra rows to demographic info to include all counties
Age_Race = county_list.join(Age_Race, how='left')
#Setting the total population for each county
Age_Race['Total population'] = list(Total_Pop)
#Dividing the entires by population
Age_Race = Age_Race.astype(str).astype(float)
Age_Race = Age_Race.divide(Age_Race['Total population'], axis=0).reset_index()

#Going through each column of Age_Race to fill NaNs
cols = list(Age_Race.columns)[2:]
for col in cols:
    Age_Race = fillfixed(col, Age_Race, 'fips', neighborcounties, neighborstats)
Age_Race = Age_Race.dropna()

In [799]:
Age_Race.to_csv('Age_Race_Filled.csv')

In [796]:
cols = list(google_neighbor.columns)[1:]
for col in cols:
    google_neighbor = fillgoogle(col, google_neighbor)
google_neighbor = google_neighbor.fillna(0)

In [801]:
google_neighbor.to_csv('google_mobility_filled.csv')

In [797]:
Age_Race.head(20)

Unnamed: 0,fips,Total population,Male,Female,Under 5 years,5 to 9 years,20 to 24 years,25 to 34 years,35 to 44 years,45 to 54 years,...,Not Hispanic or Latino!!White alone,Not Hispanic or Latino!!Black or African American alone,Not Hispanic or Latino!!American Indian and Alaska Native alone,Not Hispanic or Latino!!Asian alone,Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone,Not Hispanic or Latino!!Some other race alone,Not Hispanic or Latino!!Two or more races,Not Hispanic or Latino!!Interracial Two races including Some other race,"Not Hispanic or Latino!!Interracial Two races excluding Some other race, and Three or more races",Total housing units
0,1001,1.0,0.482249,0.520777,0.065653,0.058767,0.063859,0.147285,0.124479,0.133254,...,0.545653,0.384793,0.002882,0.018131,0.0,0.000285,0.018702,0.00108,0.017621,0.442955
1,1003,1.0,0.518475,0.553624,0.053491,0.046445,0.051269,0.115549,0.127803,0.148097,...,0.889683,0.099961,0.005945,0.011497,0.000221,0.000728,0.014241,0.000625,0.013616,0.573525
2,1005,1.0,0.489743,0.521472,0.062272,0.061487,0.080049,0.142088,0.123728,0.124749,...,0.564803,0.352259,0.00163,0.022076,0.000291,0.000627,0.021736,0.000907,0.020829,0.468966
3,1007,1.0,0.489091,0.53103,0.059001,0.060998,0.074595,0.142294,0.132838,0.126685,...,0.652565,0.284195,0.002206,0.018338,0.000335,0.001292,0.01447,0.000601,0.013869,0.447782
4,1009,1.0,0.492673,0.515437,0.063469,0.064595,0.057408,0.126583,0.12405,0.131668,...,0.586867,0.118082,0.001046,0.00571,0.000158,0.001233,0.010117,0.000308,0.009809,0.454874
5,1011,1.0,0.486311,0.521271,0.063497,0.059885,0.086881,0.145671,0.123319,0.124409,...,0.497803,0.417443,0.00204,0.029399,0.000184,0.000465,0.018281,0.000959,0.017322,0.448235
6,1013,1.0,0.511907,0.536494,0.062646,0.059921,0.065513,0.140692,0.129755,0.133225,...,0.686268,0.241386,0.003296,0.024142,0.000574,0.00189,0.028253,0.000871,0.027382,0.555662
7,1015,1.0,0.476547,0.512633,0.061821,0.06196,0.064496,0.140859,0.109135,0.11786,...,0.712457,0.214123,0.000926,0.003687,0.0,0.0,0.019303,0.002536,0.016767,0.466454
8,1017,1.0,0.494869,0.532367,0.05976,0.049592,0.115851,0.128758,0.126405,0.122008,...,0.632224,0.310448,0.001405,0.033574,0.0,0.001622,0.009438,0.0,0.009438,0.430382
9,1019,1.0,0.480663,0.520212,0.0605,0.058398,0.064866,0.129049,0.11585,0.12776,...,0.756343,0.123244,0.004062,0.008009,0.0,0.000846,0.017594,0.000498,0.017096,0.44803


In [798]:
google_neighbor.head(20)

Unnamed: 0,Date,Retail & recreation,Grocery & pharmacy,Parks,Transit stations,Workplaces,Residential,FIPS
0,2020-03-29,-42.0,-8.0,-14.0,-22.159728,-35.0,16.0,1001
1,2020-04-05,-53.0,-8.0,-14.0,-38.384899,-38.0,17.0,1001
2,2020-04-11,-37.0,16.0,-14.0,-19.449087,-30.0,20.0,1001
3,2020-03-29,-47.0,-19.0,-27.0,-24.0,-32.0,9.0,1003
4,2020-04-05,-55.0,-27.0,-44.0,-39.0,-35.0,11.0,1003
5,2020-04-11,-44.0,-10.0,-42.0,-39.0,-28.0,11.0,1003
6,2020-03-29,-27.0,-20.0,3.881817,-27.125217,-23.0,7.827074,1005
7,2020-04-05,-40.0,-20.0,-11.695368,-44.606419,-30.0,9.096527,1005
8,2020-04-11,-18.0,-20.0,-0.062126,-36.953907,-31.0,11.105885,1005
9,2020-03-29,-25.0,-10.0,16.832932,-31.922541,-32.0,12.340198,1007
