# Generation of the Candidate Flows of Human Mobility

In [72]:
import pandas as pd
import numpy as np

import os
import datetime
from datetime import date, timedelta
from tqdm import tqdm_notebook

import matplotlib.pyplot as plt
%matplotlib inline

data_path = os.path.join('/','home','fterroso','data','Spain-INE-mobility-study-2020','muestra1')

### Target Mobility Areas (MAs) 

In [73]:
target_regions_ids= ['3003001','3003002','3003003']

## General functions

In [74]:
# %load 00_general_functions.py
#!/usr/bin/env python

# In[1]:


def generate_days_fn(init_date, final_date):
    
    delta = final_date - init_date       # as timedelta

    target_days = []
    for i in range(delta.days + 1):
        day = init_date + timedelta(days=i)
        target_days.append(day)
    
    return target_days


# In[2]:


def read_INE_trips_date_fn(date_, trips_type='all', flow_type= 'all'):
    date_str= date_.strftime('%Y%m%d')
    df_date= pd.read_csv(os.path.join(data_path, f'{date_str}_maestra_1_mitma_distrito.txt'), 
                             sep='|',dtype={'origen':str, 'destino':str,'fecha':str, 'periodo':str})
    
    if flow_type== 'incoming':
        df_date = df_date[df_date['destino'].isin(target_regions_ids)]
    elif flow_type == 'outgoing':
        df_date = df_date[df_date['origen'].isin(target_regions_ids)]
    elif flow_type== 'all':
        df_date = df_date[(df_date['destino'].isin(target_regions_ids)) |
                          (df_date['origen'].isin(target_regions_ids))]
    
    if trips_type=='inter':
        df_date= df_date[df_date['origen']!=df_date['destino']] #only keep trips between areas
    elif trips_type=='intra':
        df_date= df_date[df_date['origen']==df_date['destino']] #only keep trips within the areas
    
    
    
    #convert period column to a two-digit string
    df_date['periodo'] = df_date['periodo'].apply(lambda x: x.zfill(2))
    df_date= df_date.fillna(0) # set nan as 0
    return df_date



Target time period

In [75]:
to_date = datetime.datetime.strptime('2020-07-31', '%Y-%m-%d')
from_date = datetime.datetime.strptime('2020-07-01', '%Y-%m-%d')

See shared excel file with criteria definitions

In [76]:
criteria = {
    'criterion_1' : {'destino':['3003001'], 'distancia': ['010-050','050-100', '100+'], 'actividad_destino':['trabajo']},
    'criterion_2': {'destino':['3003002'], 'distancia': ['010-050','050-100', '100+'], 'actividad_destino':['trabajo']},
    'criterion_3': {'origen':['3003001'],'destino':['3003001'], 'distancia': ['005-010','010-050','050-100', '100+'], 'actividad_destino':['trabajo']},
    'criterion_4': {'origen':['3003002'],'destino':['3003002'], 'distancia': ['005-010','010-050','050-100', '100+'], 'actividad_destino':['trabajo']},
    'criterion_5': {'destino':['3003001'], 'distancia': ['005-010','010-050','050-100', '100+'], 'actividad_origen':['casa'], 'actividad_destino':['trabajo']},
    'criterion_6': {'destino':['3003002'], 'distancia': ['005-010','010-050','050-100', '100+'], 'actividad_origen':['casa'], 'actividad_destino':['trabajo']},
    'criterion_7': {'origen':['3003001'], 'distancia': ['010-050','050-100', '100+'], 'actividad_destino':['casa']},
    'criterion_8': {'origen':['3003002'], 'distancia': ['010-050','050-100', '100+'], 'actividad_destino':['casa']}}


In [77]:
def generate_timestamp(row):
    f = row['fecha']
    h = row['periodo']
    d= datetime.datetime.strptime(r'{}-{}'.format(f,h), '%Y%m%d-%H')
    return d

In [78]:
def generate_subflow_fn(from_date, to_date, criterion):
    dfs= []
    target_days = generate_days_fn(from_date, to_date)
    for date in tqdm_notebook(target_days, leave= False):
        df = read_INE_trips_date_fn(date)
        for k in criterion:
            df = df[df[k].isin(criterion[k])]
        dfs.append(df)
    
    criterion_df = pd.concat(dfs,axis=0)
    sum_trips_c_df= criterion_df.groupby(['fecha','periodo']).agg({'viajes':['sum']})
    sum_trips_c_df= sum_trips_c_df.reset_index()
    sum_trips_c_df.columns= sum_trips_c_df.columns.droplevel(1)
    sum_trips_c_df['timestamp'] = sum_trips_c_df.apply(generate_timestamp, axis=1)
    
    return sum_trips_c_df

In [81]:
flows = []
for c in tqdm_notebook(criteria):
    flow_df = generate_subflow_fn(from_date, to_date, criteria[c])
    flow_df.to_csv(os.path.join('data', 'INE_subflows','flow_{}.csv'.format(c)))
    flows.append(flow_df)

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))

HBox(children=(IntProgress(value=0, max=31), HTML(value='')))

HBox(children=(IntProgress(value=0, max=31), HTML(value='')))

HBox(children=(IntProgress(value=0, max=31), HTML(value='')))

ValueError: Wrong number of items passed 3, placement implies 1

In [59]:
c1_df = generate_subflow_fn(from_date,to_date, criterion_1)

HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




In [60]:
c1_df.head()

Unnamed: 0,fecha,periodo,viajes,timestamp
0,20200701,1,7.185,2020-07-01 01:00:00
1,20200701,3,6.51,2020-07-01 03:00:00
2,20200701,4,5.036,2020-07-01 04:00:00
3,20200701,5,11.157,2020-07-01 05:00:00
4,20200701,6,18.017,2020-07-01 06:00:00


In [61]:
c1_df.tail()

Unnamed: 0,fecha,periodo,viajes,timestamp
52,20200703,16,41.154,2020-07-03 16:00:00
53,20200703,18,10.266,2020-07-03 18:00:00
54,20200703,19,5.288,2020-07-03 19:00:00
55,20200703,20,5.86,2020-07-03 20:00:00
56,20200703,21,5.029,2020-07-03 21:00:00


In [9]:
sum_trips_c_df= c1_df.groupby(['fecha','periodo']).agg({'viajes':['sum']})

In [13]:
sum_trips_c_df= sum_trips_c_df.reset_index()

In [41]:
sum_trips_c_df.columns= sum_trips_c_df.columns.droplevel(1)

In [42]:
sum_trips_c_df.head()

Unnamed: 0,fecha,periodo,viajes
0,20200701,1,7.185
1,20200701,3,6.51
2,20200701,4,5.036
3,20200701,5,11.157
4,20200701,6,18.017


In [44]:
sum_trips_c_df.iloc[0]

fecha      20200701
periodo          01
viajes        7.185
Name: 0, dtype: object

In [51]:
sum_trips_c_df.head()

Unnamed: 0,fecha,periodo,viajes,timestamp
0,20200701,1,7.185,2020-07-01 01:00:00
1,20200701,3,6.51,2020-07-01 03:00:00
2,20200701,4,5.036,2020-07-01 04:00:00
3,20200701,5,11.157,2020-07-01 05:00:00
4,20200701,6,18.017,2020-07-01 06:00:00
