# Clean Chicago 2019 Social Distancing Dataset

**Instructions:**

1.Provide the file path to the data, city name, and year in the second code cell. 

In [1]:
import os
import geopandas
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm, trange

import altair as alt
from vega_datasets import data

# # Allow altair to visualize large datasets
# alt.data_transformers.disable_max_rows()

In [2]:
city = 'Chicago'
city_directory = 'ch'
year = '2019'

# Provide a list of CSV file paths to read
path = Path('/Users/justinsnider/nyu-big-data/project/clean-data/')
path_list = [path/city_directory/'social'/year]

In [4]:
def get_df(path_list):
    df = []
    first = True
    for directory in tqdm(path_list, desc='Years'):
        f_list = os.listdir(directory)
        csv_files = [directory/f for f in f_list if f.endswith('.csv')]
        for csv in tqdm(csv_files, desc='Days'):
            if first:
                df = pd.read_csv(csv, dtype={'cbg': object})
                first = False
            else:
                df = df.append(pd.read_csv(csv, dtype={'cbg': object}), ignore_index=True)
    return df.sort_values(by=['date_range_start'])

In [5]:
%%time

df = get_df(path_list)

Years:   0%|          | 0/1 [00:00<?, ?it/s]

Days:   0%|          | 0/365 [00:00<?, ?it/s]

CPU times: user 9min 14s, sys: 1min 47s, total: 11min 1s
Wall time: 11min 10s


In [6]:
df

Unnamed: 0.1,Unnamed: 0,cbg,date_range_start,date_range_end,device_count,distance_traveled_from_home,bucketed_distance_traveled,median_dwell_at_bucketed_distance_traveled,completely_home_device_count,median_home_dwell_time,...,destination_cbgs,delivery_behavior_devices,median_non_home_dwell_time,candidate_device_count,bucketed_away_from_home_time,median_percentage_time_home,bucketed_percentage_time_home,mean_home_dwell_time,mean_non_home_dwell_time,mean_distance_traveled_from_home
1027355,66069,170319900000,2019-01-01T00:00:00-05:00,2019-01-02T00:00:00-05:00,29,2733,"{""16001-50000"":4,""0"":5,"">50000"":5,""2001-8000"":...","{""16001-50000"":169,"">50000"":525,""2001-8000"":14...",4,0,...,"{""170310307011"":1,""170438411023"":1,""1703143060...",1,133,93,"{""21-45"":4,""541-600"":1,""721-840"":1,""301-360"":1...",0,"{""0-25"":17,""76-100"":6}",126,271,2733
1025369,563,170312304002,2019-01-01T00:00:00-06:00,2019-01-02T00:00:00-06:00,81,1983,"{""16001-50000"":1,""0"":39,"">50000"":3,""2001-8000""...","{""16001-50000"":220,"">50000"":469,""<1000"":395,""2...",35,505,...,"{""170310102011"":1,""170312213003"":1,""1703123070...",1,7,232,"{""21-45"":4,""541-600"":1,""1201-1320"":1,""301-360""...",99,"{""0-25"":21,""76-100"":53,""51-75"":2,""26-50"":2}",612,165,3321
1029803,147814,170312503004,2019-01-01T00:00:00-06:00,2019-01-02T00:00:00-06:00,37,4726,"{""0"":20,""2001-8000"":8,""1-1000"":1,""1001-2000"":2...","{""1001-2000"":32,""2001-8000"":39,""8001-16000"":12...",19,817,...,"{""170318066002"":1,""170318435001"":1,""1703107060...",2,0,128,"{""21-45"":2,""46-60"":1,""721-840"":3,""301-360"":2,""...",100,"{""0-25"":4,""76-100"":24,""51-75"":2,""26-50"":3}",792,126,4958
1029802,147633,171978835211,2019-01-01T00:00:00-06:00,2019-01-02T00:00:00-06:00,60,4018,"{""16001-50000"":8,""0"":24,"">50000"":4,""2001-8000""...","{""16001-50000"":40,"">50000"":14,""<1000"":76,""2001...",25,999,...,"{""471570114001"":1,""170318300051"":1,""1703182411...",1,20,157,"{""21-45"":7,""541-600"":1,""46-60"":1,""721-840"":4,""...",98,"{""0-25"":6,""76-100"":38,""51-75"":1,""26-50"":6}",810,199,45463
1029801,147630,170898534004,2019-01-01T00:00:00-06:00,2019-01-02T00:00:00-06:00,83,641,"{""16001-50000"":3,""0"":45,"">50000"":1,""2001-8000""...","{""16001-50000"":372,"">50000"":6,""<1000"":189,""200...",44,779,...,"{""170898536003"":2,""170438465192"":1,""1708985300...",1,0,285,"{""21-45"":2,""481-540"":1,""46-60"":3,""721-840"":2,""...",100,"{""0-25"":13,""76-100"":60,""51-75"":7,""26-50"":1}",698,129,1051
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148703,74009,170438401034,2019-12-31T00:00:00-06:00,2020-01-01T00:00:00-06:00,111,4657,"{""16001-50000"":9,""0"":29,"">50000"":10,""2001-8000...","{""16001-50000"":209,"">50000"":20,""<1000"":145,""20...",29,352,...,"{""170438401033"":5,""550730010002"":1,""1704384070...",4,102,290,"{""21-45"":9,""481-540"":4,""541-600"":4,""46-60"":4,""...",68,"{""0-25"":27,""76-100"":52,""51-75"":22,""26-50"":8}",499,225,16215
148702,74008,170318196002,2019-12-31T00:00:00-06:00,2020-01-01T00:00:00-06:00,51,1006,"{""16001-50000"":1,""0"":8,"">50000"":10,""2001-8000""...","{""16001-50000"":378,"">50000"":304,""<1000"":65,""20...",11,558,...,"{""170318195001"":3,""170318198021"":3,""1709996190...",2,209,87,"{""21-45"":5,""541-600"":2,""721-840"":1,""1201-1320""...",71,"{""0-25"":14,""76-100"":25,""51-75"":2,""26-50"":6}",645,383,91001
148701,74007,170318141002,2019-12-31T00:00:00-06:00,2020-01-01T00:00:00-06:00,42,787,"{""16001-50000"":2,""0"":19,""2001-8000"":4,""1-1000""...","{""16001-50000"":91,""<1000"":80,""2001-8000"":71,""1...",19,611,...,"{""170438447021"":1,""170318139001"":2,""1703181170...",3,67,104,"{""21-45"":1,""541-600"":4,""46-60"":1,""721-840"":2,""...",85,"{""0-25"":5,""76-100"":24,""51-75"":6,""26-50"":5}",677,200,2798
148711,74438,170318258023,2019-12-31T00:00:00-06:00,2020-01-01T00:00:00-06:00,107,2050,"{""16001-50000"":23,""0"":34,"">50000"":2,""2001-8000...","{""16001-50000"":79,"">50000"":208,""<1000"":189,""20...",33,826,...,"{""170316202001"":2,""170319801001"":4,""1703182620...",4,107,211,"{""21-45"":2,""481-540"":1,""541-600"":2,""46-60"":4,""...",84,"{""0-25"":18,""76-100"":66,""51-75"":16,""26-50"":3}",720,251,3901


In [7]:
[print(c) for c in df.columns];

Unnamed: 0
cbg
date_range_start
date_range_end
device_count
distance_traveled_from_home
bucketed_distance_traveled
median_dwell_at_bucketed_distance_traveled
completely_home_device_count
median_home_dwell_time
bucketed_home_dwell_time
at_home_by_each_hour
part_time_work_behavior_devices
full_time_work_behavior_devices
destination_cbgs
delivery_behavior_devices
median_non_home_dwell_time
candidate_device_count
bucketed_away_from_home_time
median_percentage_time_home
bucketed_percentage_time_home
mean_home_dwell_time
mean_non_home_dwell_time
mean_distance_traveled_from_home


In [8]:
# Selecting the columsn that we need for the analysis
# col = ['date_range_start',
#        'cbg',
#        'device_count',  # Total active devices
#        'completely_home_device_count',
#        'part_time_work_behavior_devices',
#        'full_time_work_behavior_devices',
#        'delivery_behavior_devices',
#        'median_percentage_time_home',
#        'median_home_dwell_time',
#        'mean_home_dwell_time',
#        'median_non_home_dwell_time',
#        'mean_non_home_dwell_time',
#        'distance_traveled_from_home', # Median Distance traveled
#        'mean_distance_traveled_from_home']
col = ['date_range_start',
       'cbg',
       'device_count',  # Total active devices
       'completely_home_device_count',
       'part_time_work_behavior_devices',
       'full_time_work_behavior_devices']
devices_mobility = df[col]

In [9]:
#Renaming columns
devices_mobility = devices_mobility.rename(columns={'date_range_start':'date','device_count':'total', 'completely_home_device_count':'completely_home','part_time_work_behavior_devices':'part_time_work', 'full_time_work_behavior_devices':'full_time_work'})
devices_mobility

Unnamed: 0,date,cbg,total,completely_home,part_time_work,full_time_work
1027355,2019-01-01T00:00:00-05:00,170319900000,29,4,1,1
1025369,2019-01-01T00:00:00-06:00,170312304002,81,35,3,1
1029803,2019-01-01T00:00:00-06:00,170312503004,37,19,1,1
1029802,2019-01-01T00:00:00-06:00,171978835211,60,25,3,1
1029801,2019-01-01T00:00:00-06:00,170898534004,83,44,5,1
...,...,...,...,...,...,...
148703,2019-12-31T00:00:00-06:00,170438401034,111,29,10,6
148702,2019-12-31T00:00:00-06:00,170318196002,51,11,7,2
148701,2019-12-31T00:00:00-06:00,170318141002,42,19,7,2
148711,2019-12-31T00:00:00-06:00,170318258023,107,33,14,3


In [10]:
# devices_mobility

In [11]:
#Listing out the device count columns
device_columns = ['completely_home','part_time_work','full_time_work']

In [12]:
#Calculating percentage for all the device columns
for column in device_columns :
    devices_mobility['precentage_'+column]= devices_mobility.apply(lambda row: row[column]/row['total']*100.0 if row['total']!=0.0 else 0.0, axis=1)
devices_mobility

Unnamed: 0,date,cbg,total,completely_home,part_time_work,full_time_work,precentage_completely_home,precentage_part_time_work,precentage_full_time_work
1027355,2019-01-01T00:00:00-05:00,170319900000,29,4,1,1,13.793103,3.448276,3.448276
1025369,2019-01-01T00:00:00-06:00,170312304002,81,35,3,1,43.209877,3.703704,1.234568
1029803,2019-01-01T00:00:00-06:00,170312503004,37,19,1,1,51.351351,2.702703,2.702703
1029802,2019-01-01T00:00:00-06:00,171978835211,60,25,3,1,41.666667,5.000000,1.666667
1029801,2019-01-01T00:00:00-06:00,170898534004,83,44,5,1,53.012048,6.024096,1.204819
...,...,...,...,...,...,...,...,...,...
148703,2019-12-31T00:00:00-06:00,170438401034,111,29,10,6,26.126126,9.009009,5.405405
148702,2019-12-31T00:00:00-06:00,170318196002,51,11,7,2,21.568627,13.725490,3.921569
148701,2019-12-31T00:00:00-06:00,170318141002,42,19,7,2,45.238095,16.666667,4.761905
148711,2019-12-31T00:00:00-06:00,170318258023,107,33,14,3,30.841121,13.084112,2.803738


In [13]:
#Normalising the percentage columns
for column in device_columns:
        devices_mobility['norm_'+column] = (devices_mobility['precentage_'+ column] - devices_mobility['precentage_'+column].mean()) / devices_mobility['precentage_'+column].std()

devices_mobility

Unnamed: 0,date,cbg,total,completely_home,part_time_work,full_time_work,precentage_completely_home,precentage_part_time_work,precentage_full_time_work,norm_completely_home,norm_part_time_work,norm_full_time_work
1027355,2019-01-01T00:00:00-05:00,170319900000,29,4,1,1,13.793103,3.448276,3.448276,-1.735035,-1.064748,-0.535610
1025369,2019-01-01T00:00:00-06:00,170312304002,81,35,3,1,43.209877,3.703704,1.234568,1.236994,-1.015578,-1.000527
1029803,2019-01-01T00:00:00-06:00,170312503004,37,19,1,1,51.351351,2.702703,2.702703,2.059541,-1.208274,-0.692194
1029802,2019-01-01T00:00:00-06:00,171978835211,60,25,3,1,41.666667,5.000000,1.666667,1.081080,-0.766036,-0.909779
1029801,2019-01-01T00:00:00-06:00,170898534004,83,44,5,1,53.012048,6.024096,1.204819,2.227324,-0.568894,-1.006775
...,...,...,...,...,...,...,...,...,...,...,...,...
148703,2019-12-31T00:00:00-06:00,170438401034,111,29,10,6,26.126126,9.009009,5.405405,-0.489008,0.005712,-0.124579
148702,2019-12-31T00:00:00-06:00,170318196002,51,11,7,2,21.568627,13.725490,3.921569,-0.949460,0.913651,-0.436211
148701,2019-12-31T00:00:00-06:00,170318141002,42,19,7,2,45.238095,16.666667,4.761905,1.441908,1.479837,-0.259725
148711,2019-12-31T00:00:00-06:00,170318258023,107,33,14,3,30.841121,13.084112,2.803738,-0.012643,0.790183,-0.670974


In [14]:
devices_mobility['date'] = pd.to_datetime(devices_mobility['date'],utc= True)
devices_mobility

Unnamed: 0,date,cbg,total,completely_home,part_time_work,full_time_work,precentage_completely_home,precentage_part_time_work,precentage_full_time_work,norm_completely_home,norm_part_time_work,norm_full_time_work
1027355,2019-01-01 05:00:00+00:00,170319900000,29,4,1,1,13.793103,3.448276,3.448276,-1.735035,-1.064748,-0.535610
1025369,2019-01-01 06:00:00+00:00,170312304002,81,35,3,1,43.209877,3.703704,1.234568,1.236994,-1.015578,-1.000527
1029803,2019-01-01 06:00:00+00:00,170312503004,37,19,1,1,51.351351,2.702703,2.702703,2.059541,-1.208274,-0.692194
1029802,2019-01-01 06:00:00+00:00,171978835211,60,25,3,1,41.666667,5.000000,1.666667,1.081080,-0.766036,-0.909779
1029801,2019-01-01 06:00:00+00:00,170898534004,83,44,5,1,53.012048,6.024096,1.204819,2.227324,-0.568894,-1.006775
...,...,...,...,...,...,...,...,...,...,...,...,...
148703,2019-12-31 06:00:00+00:00,170438401034,111,29,10,6,26.126126,9.009009,5.405405,-0.489008,0.005712,-0.124579
148702,2019-12-31 06:00:00+00:00,170318196002,51,11,7,2,21.568627,13.725490,3.921569,-0.949460,0.913651,-0.436211
148701,2019-12-31 06:00:00+00:00,170318141002,42,19,7,2,45.238095,16.666667,4.761905,1.441908,1.479837,-0.259725
148711,2019-12-31 06:00:00+00:00,170318258023,107,33,14,3,30.841121,13.084112,2.803738,-0.012643,0.790183,-0.670974


In [15]:
# Normalize time
devices_mobility['date'] = devices_mobility['date'].dt.normalize()
devices_mobility

Unnamed: 0,date,cbg,total,completely_home,part_time_work,full_time_work,precentage_completely_home,precentage_part_time_work,precentage_full_time_work,norm_completely_home,norm_part_time_work,norm_full_time_work
1027355,2019-01-01 00:00:00+00:00,170319900000,29,4,1,1,13.793103,3.448276,3.448276,-1.735035,-1.064748,-0.535610
1025369,2019-01-01 00:00:00+00:00,170312304002,81,35,3,1,43.209877,3.703704,1.234568,1.236994,-1.015578,-1.000527
1029803,2019-01-01 00:00:00+00:00,170312503004,37,19,1,1,51.351351,2.702703,2.702703,2.059541,-1.208274,-0.692194
1029802,2019-01-01 00:00:00+00:00,171978835211,60,25,3,1,41.666667,5.000000,1.666667,1.081080,-0.766036,-0.909779
1029801,2019-01-01 00:00:00+00:00,170898534004,83,44,5,1,53.012048,6.024096,1.204819,2.227324,-0.568894,-1.006775
...,...,...,...,...,...,...,...,...,...,...,...,...
148703,2019-12-31 00:00:00+00:00,170438401034,111,29,10,6,26.126126,9.009009,5.405405,-0.489008,0.005712,-0.124579
148702,2019-12-31 00:00:00+00:00,170318196002,51,11,7,2,21.568627,13.725490,3.921569,-0.949460,0.913651,-0.436211
148701,2019-12-31 00:00:00+00:00,170318141002,42,19,7,2,45.238095,16.666667,4.761905,1.441908,1.479837,-0.259725
148711,2019-12-31 00:00:00+00:00,170318258023,107,33,14,3,30.841121,13.084112,2.803738,-0.012643,0.790183,-0.670974


In [16]:
# Check the number of rows for each CBG
print('Min: {}\nMax: {}'.format(min(set(devices_mobility.groupby('cbg').count()['date'])), max(set(devices_mobility.groupby('cbg').count()['date']))))

Min: 2
Max: 365


In [17]:
for col in devices_mobility.columns:
    print('\n')
    print(col)
#     if devices_mobility[col].dtype
#     print(devices_mobility[col].dtype)
    print('Min: {}\nMax: {}'.format(devices_mobility[col].min(), devices_mobility[col].max()))



date
Min: 2019-01-01 00:00:00+00:00
Max: 2019-12-31 00:00:00+00:00


cbg
Min: 170310101001
Max: 550599900000


total
Min: 5
Max: 9089


completely_home
Min: 1
Max: 3637


part_time_work
Min: 1
Max: 504


full_time_work
Min: 1
Max: 244


precentage_completely_home
Min: 0.6944444444444444
Max: 94.87179487179486


precentage_part_time_work
Min: 0.1718213058419244
Max: 66.66666666666666


precentage_full_time_work
Min: 0.06067961165048543
Max: 57.14285714285714


norm_completely_home
Min: -3.058415306494449
Max: 6.456488060136479


norm_part_time_work
Min: -1.6954772012259136
Max: 11.105009681416492


norm_full_time_work
Min: -1.2470644256110104
Max: 10.74118652392788


In [18]:
# Print all Null and NaN values
print('We have {} null values.'.format(len(devices_mobility[devices_mobility.isna().any(axis=1)])))

We have 0 null values.


In [19]:
# Grouping based on day
grouped_dm = devices_mobility.groupby(devices_mobility['date']).mean()
grouped_dm

Unnamed: 0_level_0,total,completely_home,part_time_work,full_time_work,precentage_completely_home,precentage_part_time_work,precentage_full_time_work,norm_completely_home,norm_part_time_work,norm_full_time_work
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-01 00:00:00+00:00,91.247785,37.888722,4.118186,1.685088,41.177627,4.519741,2.167083,1.031672,-0.858488,-0.804683
2019-01-02 00:00:00+00:00,91.810303,35.090868,7.486332,5.285521,38.260088,8.242369,5.969580,0.736908,-0.141869,-0.006093
2019-01-03 00:00:00+00:00,91.214929,32.641033,7.944728,5.393662,35.793576,8.784989,6.067273,0.487712,-0.037413,0.014425
2019-01-04 00:00:00+00:00,90.050008,30.264004,7.441658,4.526956,32.980235,8.164460,5.021958,0.203475,-0.156867,-0.205110
2019-01-05 00:00:00+00:00,90.936467,34.614899,5.549114,2.463202,37.696412,6.162376,3.049713,0.679959,-0.542275,-0.619315
...,...,...,...,...,...,...,...,...,...,...
2019-12-27 00:00:00+00:00,83.286508,22.474459,7.293269,4.199820,27.428792,8.548535,5.118998,-0.357397,-0.082931,-0.184730
2019-12-28 00:00:00+00:00,84.239784,24.402945,6.034405,2.314002,29.132069,6.806781,2.993150,-0.185312,-0.418225,-0.631195
2019-12-29 00:00:00+00:00,84.815956,27.283203,5.775240,1.987831,32.417181,6.654538,2.741454,0.146589,-0.447532,-0.684055
2019-12-30 00:00:00+00:00,85.414601,24.619949,7.282560,4.747484,29.098098,8.320166,5.546353,-0.188744,-0.126893,-0.094978


In [20]:
grouped_dm.reset_index()

Unnamed: 0,date,total,completely_home,part_time_work,full_time_work,precentage_completely_home,precentage_part_time_work,precentage_full_time_work,norm_completely_home,norm_part_time_work,norm_full_time_work
0,2019-01-01 00:00:00+00:00,91.247785,37.888722,4.118186,1.685088,41.177627,4.519741,2.167083,1.031672,-0.858488,-0.804683
1,2019-01-02 00:00:00+00:00,91.810303,35.090868,7.486332,5.285521,38.260088,8.242369,5.969580,0.736908,-0.141869,-0.006093
2,2019-01-03 00:00:00+00:00,91.214929,32.641033,7.944728,5.393662,35.793576,8.784989,6.067273,0.487712,-0.037413,0.014425
3,2019-01-04 00:00:00+00:00,90.050008,30.264004,7.441658,4.526956,32.980235,8.164460,5.021958,0.203475,-0.156867,-0.205110
4,2019-01-05 00:00:00+00:00,90.936467,34.614899,5.549114,2.463202,37.696412,6.162376,3.049713,0.679959,-0.542275,-0.619315
...,...,...,...,...,...,...,...,...,...,...,...
360,2019-12-27 00:00:00+00:00,83.286508,22.474459,7.293269,4.199820,27.428792,8.548535,5.118998,-0.357397,-0.082931,-0.184730
361,2019-12-28 00:00:00+00:00,84.239784,24.402945,6.034405,2.314002,29.132069,6.806781,2.993150,-0.185312,-0.418225,-0.631195
362,2019-12-29 00:00:00+00:00,84.815956,27.283203,5.775240,1.987831,32.417181,6.654538,2.741454,0.146589,-0.447532,-0.684055
363,2019-12-30 00:00:00+00:00,85.414601,24.619949,7.282560,4.747484,29.098098,8.320166,5.546353,-0.188744,-0.126893,-0.094978


In [21]:
title = '{} {}'.format(city,year)
alt.Chart(grouped_dm.reset_index()).mark_line().encode(
    x=alt.X('date', title='Date'),
    y=alt.Y(alt.repeat('row'), type='quantitative')
).properties(
    width=300,
    height=250
).repeat(
    row=['precentage_completely_home','precentage_part_time_work','precentage_full_time_work']
).interactive()

![ch-daily-2019.png](https://github.com/chouhandiksha/bigdataproject/raw/main/media/social-dist/ch-daily-2019.png)

In [22]:
#Adding month column for further usage
devices_mobility['month'] = devices_mobility['date'].dt.month
devices_mobility

Unnamed: 0,date,cbg,total,completely_home,part_time_work,full_time_work,precentage_completely_home,precentage_part_time_work,precentage_full_time_work,norm_completely_home,norm_part_time_work,norm_full_time_work,month
1027355,2019-01-01 00:00:00+00:00,170319900000,29,4,1,1,13.793103,3.448276,3.448276,-1.735035,-1.064748,-0.535610,1
1025369,2019-01-01 00:00:00+00:00,170312304002,81,35,3,1,43.209877,3.703704,1.234568,1.236994,-1.015578,-1.000527,1
1029803,2019-01-01 00:00:00+00:00,170312503004,37,19,1,1,51.351351,2.702703,2.702703,2.059541,-1.208274,-0.692194,1
1029802,2019-01-01 00:00:00+00:00,171978835211,60,25,3,1,41.666667,5.000000,1.666667,1.081080,-0.766036,-0.909779,1
1029801,2019-01-01 00:00:00+00:00,170898534004,83,44,5,1,53.012048,6.024096,1.204819,2.227324,-0.568894,-1.006775,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
148703,2019-12-31 00:00:00+00:00,170438401034,111,29,10,6,26.126126,9.009009,5.405405,-0.489008,0.005712,-0.124579,12
148702,2019-12-31 00:00:00+00:00,170318196002,51,11,7,2,21.568627,13.725490,3.921569,-0.949460,0.913651,-0.436211,12
148701,2019-12-31 00:00:00+00:00,170318141002,42,19,7,2,45.238095,16.666667,4.761905,1.441908,1.479837,-0.259725,12
148711,2019-12-31 00:00:00+00:00,170318258023,107,33,14,3,30.841121,13.084112,2.803738,-0.012643,0.790183,-0.670974,12


In [23]:
#Grouping data on basis of month value
grouped_dm = devices_mobility.groupby([devices_mobility['month']]).mean()
grouped_dm

Unnamed: 0_level_0,total,completely_home,part_time_work,full_time_work,precentage_completely_home,precentage_part_time_work,precentage_full_time_work,norm_completely_home,norm_part_time_work,norm_full_time_work
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,96.613418,37.483809,8.036621,5.617914,38.630917,8.247624,5.92574,0.774373,-0.140857,-0.0153
2,96.836765,33.842654,9.167516,6.580113,34.820179,9.415565,6.928666,0.389368,0.083975,0.195332
3,94.842199,30.78837,8.438744,5.256923,32.450728,8.891708,5.736731,0.149978,-0.016869,-0.054995
4,97.628858,30.305518,9.043481,5.510831,31.400317,9.052089,5.658689,0.043853,0.014005,-0.071385
5,104.44815,31.887516,9.23352,5.746921,30.682703,8.711961,5.624916,-0.028649,-0.051471,-0.078478
6,100.335075,31.335776,7.947571,4.28099,31.251714,7.892547,4.519283,0.028839,-0.209211,-0.31068
7,90.072537,27.382308,7.15979,3.817568,30.373915,7.922113,4.459675,-0.059846,-0.203519,-0.323199
8,98.1701,29.925192,7.929431,4.676282,28.26103,8.421511,5.155502,-0.273315,-0.107383,-0.177063
9,111.208328,35.175548,9.38282,6.250005,28.79413,9.013878,6.278831,-0.219455,0.006649,0.058856
10,85.491671,24.069772,8.295616,5.960022,28.148827,9.410173,6.895883,-0.284651,0.082937,0.188447


In [24]:
d = []
for col in ['precentage_completely_home','precentage_part_time_work','precentage_full_time_work']:
        for m in range(1,13):
            d.append({'month': m, 'column': col, 'value': grouped_dm[col][m]})
d = alt.Data(values=d)

In [25]:
title = '{} {}'.format(city,year)
alt.Chart(d, title=title).mark_line().encode(
    x=alt.X('month:O', title='Month'),
    y=alt.Y('value:Q', type='quantitative', title='Percentage'),
    color='column:N'
).properties(
    width=300,
    height=250
).interactive()

![ch-monthly-2019](https://github.com/chouhandiksha/bigdataproject/raw/main/media/social-dist/ch-monthly-2019.png)