# Covid-19 Mobility Analysis

In [78]:
import numpy as np
import pandas as pd
import random

In [79]:
df=pd.read_csv("mobility.csv")
df.head()

Unnamed: 0,Country,Retail & recreation,Grocery & pharmacy,Parks,Transit stations,Workplaces,Residential
0,Afghanistan,-38%,-21%,-13%,-34%,-33%,+10%
1,Angola,-61%,-40%,-39%,-57%,-11%,+22%
2,Antigua and Barbuda,-46%,-21%,-30%,-67%,-48%,*\n+17%
3,Argentina,-86%,-61%,-89%,-80%,-57%,+27%
4,Aruba,-88%,-66%,-80%,-88%,-72%,+20%


## Preprocessing

### Removing noice from data

In [80]:
df2=df.copy()

In [81]:
columns=df.columns[1:]
for column in columns:
    df2[column] = pd.to_numeric(df2[column].astype(str).str.replace('Baseline','0'), errors='coerce')

In [82]:
for column in columns:
    data=df[column]
    df2[column]=list(map(lambda x:x[:-1],df[column].values))
df2.head()

Unnamed: 0,Country,Retail & recreation,Grocery & pharmacy,Parks,Transit stations,Workplaces,Residential
0,Afghanistan,-38,-21,-13,-34,-33,+10
1,Angola,-61,-40,-39,-57,-11,+22
2,Antigua and Barbuda,-46,-21,-30,-67,-48,*\n+17
3,Argentina,-86,-61,-89,-80,-57,+27
4,Aruba,-88,-66,-80,-88,-72,+20


In [83]:
for column in columns:
    #df2['new'] = pd.to_numeric(df2['Residential'].astype(str).str.replace(',',''), errors='coerce').fillna(0).astype(int)
    df2[column] = pd.to_numeric(df2[column].astype(str).str.replace(',',''), errors='coerce').fillna(np.nan)

In [84]:
df=df2.copy()
df.set_index('Country',inplace=True)
df.head()

Unnamed: 0_level_0,Retail & recreation,Grocery & pharmacy,Parks,Transit stations,Workplaces,Residential
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Afghanistan,-38,-21.0,-13.0,-34.0,-33.0,10.0
Angola,-61,-40.0,-39.0,-57.0,-11.0,22.0
Antigua and Barbuda,-46,-21.0,-30.0,-67.0,-48.0,
Argentina,-86,-61.0,-89.0,-80.0,-57.0,27.0
Aruba,-88,-66.0,-80.0,-88.0,-72.0,20.0


### Dealing with missing values

In [85]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 193 entries, Afghanistan to Zimbabwe
Data columns (total 6 columns):
Retail & recreation    193 non-null int64
Grocery & pharmacy     192 non-null float64
Parks                  191 non-null float64
Transit stations       191 non-null float64
Workplaces             190 non-null float64
Residential            179 non-null float64
dtypes: float64(5), int64(1)
memory usage: 10.6+ KB


In [86]:
df.describe()

Unnamed: 0,Retail & recreation,Grocery & pharmacy,Parks,Transit stations,Workplaces,Residential
count,193.0,192.0,191.0,191.0,190.0,179.0
mean,-55.207254,-33.015625,-25.86911,-55.350785,-34.347368,15.486034
std,21.436402,21.920961,40.99857,20.561269,18.120565,8.040178
min,-96.0,-94.0,-92.0,-92.0,-73.0,-3.0
25%,-73.0,-47.0,-55.0,-71.0,-46.0,11.0
50%,-53.0,-28.5,-31.0,-58.0,-36.0,14.0
75%,-41.0,-19.0,-10.0,-39.5,-25.0,19.0
max,-2.0,24.0,126.0,-6.0,14.0,80.0


### Filling null values

In [87]:
#as standard deviation for each column is large,
#so we cannot fill the missing values as mean values
for column in columns:
    num=df[column].isnull().sum()
    mean=df[column].mean()
    std=df[column].std()
    lower_limit=round(mean-std,0)
    upper_limit=round(mean+std,0)
    random_list=[]
    for i in range(0,num):
        random_list.append(random.randint(lower_limit,upper_limit))
    random_list=np.array(random_list)
    data=df[column].values
    k=0
    for i,j in enumerate(data):
        if np.isnan(data[i]):
            data[i]=random_list[k]
            k+=1
    df[column]=data


In [88]:
#no null values
df.isnull().sum()

Retail & recreation    0
Grocery & pharmacy     0
Parks                  0
Transit stations       0
Workplaces             0
Residential            0
dtype: int64

### Normalizing data using z-score

In [89]:
for column in columns:
    mean=df[column].mean()
    std=df[column].std()
    df[column]=(df[column]-mean)/std
    #df[column]=df[column].astype('int')

In [90]:
df.head()

Unnamed: 0_level_0,Retail & recreation,Grocery & pharmacy,Parks,Transit stations,Workplaces,Residential
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Afghanistan,0.802712,0.545665,0.315198,1.040063,0.076514,-0.691411
Angola,-0.270229,-0.322428,-0.322056,-0.083803,1.293295,0.833644
Antigua and Barbuda,0.429515,0.545665,-0.101468,-0.57244,-0.753109,-0.055971
Argentina,-1.43647,-1.281898,-1.547546,-1.207668,-1.250883,1.469083
Aruba,-1.529769,-1.510344,-1.326958,-1.598578,-2.080506,0.579468
