<a href="https://colab.research.google.com/github/aarsanjani/meansquares/blob/master/Google_MobilityDataAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Google mobility Data

* Data downloaded from : https://www.google.com/covid19/mobility/

In [21]:
!pip install wget



In [22]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import os
import wget
from pandas import Series, datetime
from pandas.plotting import scatter_matrix, autocorrelation_plot
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV, TimeSeriesSplit
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import roc_curve, auc
import random
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
from xgboost import XGBClassifier
from sklearn.mixture import GaussianMixture

In [23]:
download_url = 'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv?cachebust=94537edba4db1128'

filename = wget.download(download_url)

filename

'Global_Mobility_Report (1).csv'

In [24]:
mobility_data = pd.read_csv(filename,low_memory=False)
mobility_data.head(2)

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
0,AE,United Arab Emirates,,,,,,2020-02-15,0.0,4.0,5.0,0.0,2.0,1.0
1,AE,United Arab Emirates,,,,,,2020-02-16,1.0,4.0,4.0,1.0,2.0,1.0


In [25]:
US_mobility = mobility_data[mobility_data['country_region'] == 'United States']
US_mobility.head()

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
1089802,US,United States,,,,,,2020-02-15,6.0,2.0,15.0,3.0,2.0,-1.0
1089803,US,United States,,,,,,2020-02-16,7.0,1.0,16.0,2.0,0.0,-1.0
1089804,US,United States,,,,,,2020-02-17,6.0,0.0,28.0,-9.0,-24.0,5.0
1089805,US,United States,,,,,,2020-02-18,0.0,-1.0,6.0,1.0,0.0,1.0
1089806,US,United States,,,,,,2020-02-19,2.0,0.0,8.0,1.0,1.0,0.0


## 'Sub region' column has the State data 

In [26]:
US_mobility['sub_region_1'].unique()

array([nan, 'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District of Columbia',
       'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana',
       'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
       'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
       'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
       'New Jersey', 'New Mexico', 'New York', 'North Carolina',
       'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
       'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
       'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming'], dtype=object)

In [27]:
CA_mobility_data = US_mobility[US_mobility['sub_region_1'] == 'California' ]
#CA_mobility_data.dropna()
CA_mobility_data.head(2)

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
1116351,US,United States,California,,,US-CA,,2020-02-15,1.0,1.0,19.0,1.0,-1.0,0.0
1116352,US,United States,California,,,US-CA,,2020-02-16,5.0,0.0,31.0,1.0,-1.0,-1.0


## 'sub_region_2' column has the county data

In [28]:
print(len(CA_mobility_data['sub_region_2'].unique()))
print(CA_mobility_data.shape)
CA_mobility_data['sub_region_2'].unique()

57
(9480, 14)


array([nan, 'Alameda County', 'Amador County', 'Butte County',
       'Calaveras County', 'Colusa County', 'Contra Costa County',
       'Del Norte County', 'El Dorado County', 'Fresno County',
       'Glenn County', 'Humboldt County', 'Imperial County',
       'Inyo County', 'Kern County', 'Kings County', 'Lake County',
       'Lassen County', 'Los Angeles County', 'Madera County',
       'Marin County', 'Mariposa County', 'Mendocino County',
       'Merced County', 'Modoc County', 'Mono County', 'Monterey County',
       'Napa County', 'Nevada County', 'Orange County', 'Placer County',
       'Plumas County', 'Riverside County', 'Sacramento County',
       'San Benito County', 'San Bernardino County', 'San Diego County',
       'San Francisco County', 'San Joaquin County',
       'San Luis Obispo County', 'San Mateo County',
       'Santa Barbara County', 'Santa Clara County', 'Santa Cruz County',
       'Shasta County', 'Siskiyou County', 'Solano County',
       'Sonoma County', 'St

In [29]:
CA_mobility_data = CA_mobility_data.dropna(subset=['sub_region_2'])
print(CA_mobility_data.shape)
CA_mobility_data.head(2)
#CA_mobility_data[CA_mobility_data['sub_region_2'] == nan]

(9312, 14)


Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
1116519,US,United States,California,Alameda County,,,6001.0,2020-02-15,1.0,0.0,22.0,1.0,0.0,-1.0
1116520,US,United States,California,Alameda County,,,6001.0,2020-02-16,7.0,0.0,24.0,5.0,1.0,-2.0
