In [21]:
# imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [22]:
# read in peter's preprocessed US Mobility datset
data = "https://raw.githubusercontent.com/ehuang13/w209_final/master/data/US_Mobility_Report_preprocess.csv"
us_df = pd.read_csv(data, low_memory = False)

In [23]:
# checkout the data
us_df.sample(10)

Unnamed: 0.1,Unnamed: 0,country_region_code,country_region,state,county,iso_3166_2_code,census_fips_code,date,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential,avg_change
101948,101948,US,United States,Kentucky,Harlan County,,21095,4/25/2020,-30.0,-18.428571,-45.130435,-27.428571,-4.0,11.571429,-18.902692
281612,281612,US,United States,Washington,King County,,53033,4/3/2020,-56.0,-21.0,-18.0,-72.0,-69.0,31.0,-34.166667
121135,121135,US,United States,Michigan,Kalamazoo County,,26077,3/15/2020,-13.0,2.0,160.0,-12.0,-5.0,2.0,22.333333
164940,164940,US,United States,Nevada,Storey County,,32029,6/1/2020,-8.0,1.5,-23.0,-21.333333,-40.0,12.5,-13.055556
284345,284345,US,United States,West Virginia,Boone County,,54005,2/25/2020,16.0,-3.0,-10.454545,7.833333,2.0,0.0,2.063131
278006,278006,US,United States,Virginia,Dickenson County,,51051,5/22/2020,-12.5,9.666667,25.4,-18.0,-30.0,18.8,-1.105556
126824,126824,US,United States,Michigan,Grand Traverse County,,26055,5/27/2020,-10.0,1.0,103.0,-47.0,-34.0,11.0,4.0
289068,289068,US,United States,Wisconsin,La Crosse County,,55063,2/19/2020,-1.0,-1.0,-5.0,-2.0,2.0,0.0,-1.166667
90217,90217,US,United States,Kansas,McPherson County,,20113,3/16/2020,0.0,19.0,0.2,-3.0,-16.0,6.0,1.033333
18,18,US,United States,Alabama,Sumter County,,1119,2/15/2020,-6.0,-11.0,18.6,6.333333,3.5,-0.5,1.822222


### Calculate `Average Change` Across Different Locations

In [24]:
average_cols = list(us_df[['retail_and_recreation',
       'grocery_and_pharmacy', 'parks', 'transit_stations', 'workplaces',
       'residential']].mean(axis=1))

In [25]:
us_df["avg_change"] = average_cols

In [26]:
us_df.sample(5)

Unnamed: 0.1,Unnamed: 0,country_region_code,country_region,state,county,iso_3166_2_code,census_fips_code,date,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential,avg_change
158597,158597,US,United States,Nebraska,Stanton County,,31167,2/19/2020,-12.5,-4.5,3.346154,-4.333333,8.0,0.8,-1.531197
213123,213123,US,United States,Pennsylvania,Tioga County,,42117,2/20/2020,7.0,-1.0,1.333333,4.0,-1.0,1.0,1.888889
108916,108916,US,United States,Louisiana,Lafourche Parish,,22057,4/2/2020,-26.0,-6.0,-13.2,-40.0,-37.0,16.0,-17.7
182897,182897,US,United States,North Carolina,Ashe County,,37009,3/28/2020,-18.0,16.0,44.0,-44.0,-27.0,7.333333,-3.611111
29148,29148,US,United States,Connecticut,Hartford County,,9003,2/23/2020,8.0,-1.0,81.0,11.0,3.0,-1.0,16.833333


In [27]:
# confirm type for location columns
us_df[['retail_and_recreation',
       'grocery_and_pharmacy', 'parks', 'transit_stations', 'workplaces',
       'residential',"avg_change"]].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 298720 entries, 0 to 298719
Data columns (total 7 columns):
retail_and_recreation    298720 non-null float64
grocery_and_pharmacy     298720 non-null float64
parks                    298720 non-null float64
transit_stations         298720 non-null float64
workplaces               298720 non-null float64
residential              298720 non-null float64
avg_change               298720 non-null float64
dtypes: float64(7)
memory usage: 16.0 MB


In [28]:
# drop extra ID column
us_df = us_df.drop(columns = ['Unnamed: 0'])

In [29]:
us_df.sample(5)

Unnamed: 0,country_region_code,country_region,state,county,iso_3166_2_code,census_fips_code,date,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential,avg_change
55696,US,United States,Idaho,Kootenai County,,16055,3/28/2020,-47.0,-20.0,32.0,-19.0,-34.0,12.0,-12.666667
186907,US,United States,North Carolina,Chatham County,,37037,5/9/2020,-12.0,13.0,61.2,-0.8,-23.0,6.5,7.483333
213998,US,United States,Pennsylvania,Huntingdon County,,42061,3/5/2020,14.0,5.0,71.333333,9.0,4.0,-1.0,17.055556
117588,US,United States,Massachusetts,Plymouth County,,25023,3/6/2020,2.0,4.0,6.0,-4.0,1.0,1.0,1.666667
225820,US,United States,South Carolina,Florence County,,45041,6/7/2020,-4.0,11.0,3.0,32.0,-17.0,4.0,4.833333


### Export US Mobility Data to CSV File For Tableau Work

In [30]:
us_df.to_csv("US_Mobility_Report_preprocess.csv")

### Calculate `Abs(Avg Change)` to Understand Magnitude of Movement

In [31]:
abs_avg_change = []
for i in us_df["avg_change"]:
    i = abs(i)
    abs_avg_change.append(i)

In [32]:
us_df["abs_avg_change"] = abs_avg_change

In [33]:
us_df.sample(5)

Unnamed: 0,country_region_code,country_region,state,county,iso_3166_2_code,census_fips_code,date,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential,avg_change,abs_avg_change
216707,US,United States,Pennsylvania,Bradford County,,42015,4/16/2020,-27.0,-2.0,-5.285714,-39.666667,-36.0,16.0,-15.65873,15.65873
26852,US,United States,Colorado,Teller County,,8119,4/19/2020,-34.5,-13.5,-0.5,-35.0,-42.0,14.666667,-18.472222,18.472222
180604,US,United States,North Carolina,Halifax County,,37083,3/5/2020,16.0,3.0,8.0,11.25,1.0,-2.0,6.208333,6.208333
190174,US,United States,North Dakota,McLean County,,38055,3/12/2020,20.0,26.5,0.125,10.25,8.0,-1.0,10.645833,10.645833
296718,US,United States,Wyoming,Carbon County,,56007,2/23/2020,8.0,5.0,21.125,1.0,-2.0,-1.0,5.354167,5.354167
