In [1]:
import pandas as pd
import numpy as np
import os
import datetime

In [2]:
def edit_column_date(frame,index):
    #Edits the date format of columns of dataframes
    #index: index of the first column of dates + 1
    i = 0
    for col in frame:
        i += 1
        if i >= index:
            new_d = date_format(col)
            frame = frame.rename(columns={col : new_d})
    return frame

In [3]:
def sort_dates(frame,index):
    #Sorts the columns by date of a frame with many nonconsecutive dates (several factors per date)
    Beg = list(frame.columns[:index]) #First four entries
    End = list(np.sort(np.array(frame.columns[index:]))) #Every Date Sorted
    cols = list(Beg + End) #Ordered Columns

    frame = frame[cols]
    return frame

In [4]:
def date_format(date):
    d = datetime.datetime.strptime(date, '%Y-%m-%d')
    return datetime.date.strftime(d, "%m/%d/%y")

In [5]:
#Loading in mobility data
DL_us_m50 = pd.read_csv('../../../../data/us/mobility/DL-us-m50.csv', encoding='latin1')
DL_us_m50_index = pd.read_csv('../../../../data/us/mobility/DL-us-m50_index.csv', encoding='latin1')
DL_us_samples = pd.read_csv('../../../../data/us/mobility/DL-us-samples.csv')

In [6]:
#Cleaning the datasets
DL_us_m50 = edit_column_date(DL_us_m50,6)
DL_us_m50_index = edit_column_date(DL_us_m50_index,6)
DL_us_samples = edit_column_date(DL_us_samples,6)

DL_us_m50 = DL_us_m50.drop(columns=['country_code','admin_level','admin1','admin2'])
DL_us_m50_index = DL_us_m50_index.drop(columns=['country_code','admin_level','admin1','admin2'])
DL_us_samples = DL_us_samples.drop(columns=['country_code','admin_level','admin1','admin2'])

In [7]:
#Separating data into county info

DL_us_m50_County = DL_us_m50[DL_us_m50.fips >= 1000]
DL_us_m50_index_County = DL_us_m50_index[DL_us_m50_index.fips >= 1000]
DL_us_samples_County = DL_us_samples[DL_us_samples.fips >= 1000]

In [8]:
#merging the 3 datasets together
Mobility_County = pd.merge(DL_us_m50_County, DL_us_m50_index_County, \
                    left_on='fips', right_on='fips', suffixes=('_M_m50', ''), sort=True)
Mobility_County = pd.merge(Mobility_County, DL_us_samples_County, \
                    left_on='fips', right_on='fips', suffixes=('_M_idx', '_M_samples'), sort=True)
Mobility_County = Mobility_County[Mobility_County.fips >= -1]
Mobility_County.columns = Mobility_County.columns.str.replace('fips','FIPS')
#saving datasets with 3 values not consecutive and then consecutive
Mobility_County_Nonconsecutive = Mobility_County
Mobility_County_Consecutive = sort_dates(Mobility_County,1)
Mobility_County_Consecutive.to_csv('Mobility_County_Consecutive.csv')
Mobility_County_Nonconsecutive.to_csv('Mobility_County_Nonconsecutive.csv')

In [9]:
print('DL_us_m50_County: ' + str(len(DL_us_m50_County)))
print('DL_us_m50_index_County: ' + str(len(DL_us_m50_index_County)))
print('DL_us_samples_County: ' + str(len(DL_us_samples_County)))
print('Mobility_County_Consecutive: ' + str(len(Mobility_County_Consecutive)))

DL_us_m50_County: 2670
DL_us_m50_index_County: 2670
DL_us_samples_County: 2670
Mobility_County_Consecutive: 2676


In [10]:
Mobility_County_Consecutive.head()

Unnamed: 0,FIPS,03/01/20_M_idx,03/01/20_M_m50,03/01/20_M_samples,03/02/20_M_idx,03/02/20_M_m50,03/02/20_M_samples,03/03/20_M_idx,03/03/20_M_m50,03/03/20_M_samples,...,04/02/20_M_samples,04/03/20_M_idx,04/03/20_M_m50,04/03/20_M_samples,04/04/20_M_idx,04/04/20_M_m50,04/04/20_M_samples,04/05/20_M_idx,04/05/20_M_m50,04/05/20_M_samples
0,1001.0,49.0,7.194,1703.0,100.0,14.587,1829.0,95.0,13.865,1840.0,...,1760.0,51.0,7.47,1730.0,27.0,4.006,1708.0,7.0,1.049,1723.0
1,1003.0,81.0,9.78,7067.0,100.0,12.042,7136.0,95.0,11.481,7220.0,...,6683.0,56.0,6.758,6666.0,36.0,4.392,6534.0,10.0,1.324,6494.0
2,1005.0,90.0,8.348,546.0,107.0,10.004,569.0,100.0,9.267,545.0,...,551.0,66.0,6.155,574.0,55.0,5.184,537.0,23.0,2.183,567.0
3,1007.0,53.0,13.008,512.0,95.0,23.076,574.0,100.0,24.164,588.0,...,521.0,58.0,14.236,524.0,30.0,7.384,506.0,12.0,2.911,511.0
4,1009.0,68.0,15.963,1495.0,96.0,22.456,1608.0,100.0,23.222,1615.0,...,1545.0,62.0,14.549,1557.0,34.0,7.914,1487.0,8.0,1.989,1490.0
