# Generation of the Candidate Flows of Human Mobility

In [11]:
import pandas as pd
import numpy as np

import os
import datetime
from datetime import date, timedelta
from tqdm import tqdm_notebook

import matplotlib.pyplot as plt
%matplotlib inline

data_path = os.path.join('/','home','fterroso','data','Spain-INE-mobility-study-2020','muestra1')

### Target Mobility Areas (MAs) 

In [12]:
target_regions_ids= ['3003001','3003002','3003003']

## General functions

In [13]:
# %load 00_general_functions.py
#!/usr/bin/env python

# In[1]:


def generate_days_fn(init_date, final_date):
    
    delta = final_date - init_date       # as timedelta

    target_days = []
    for i in range(delta.days + 1):
        day = init_date + timedelta(days=i)
        target_days.append(day)
    
    return target_days


# In[2]:


def read_INE_trips_date_fn(date_, trips_type='all', flow_type= 'all'):
    date_str= date_.strftime('%Y%m%d')
    df_date= pd.read_csv(os.path.join(data_path, f'{date_str}_maestra_1_mitma_distrito.txt'), 
                             sep='|',dtype={'origen':str, 'destino':str,'fecha':str, 'periodo':str})
    
    if flow_type== 'incoming':
        df_date = df_date[df_date['destino'].isin(target_regions_ids)]
    elif flow_type == 'outgoing':
        df_date = df_date[df_date['origen'].isin(target_regions_ids)]
    elif flow_type== 'all':
        df_date = df_date[(df_date['destino'].isin(target_regions_ids)) |
                          (df_date['origen'].isin(target_regions_ids))]
    
    if trips_type=='inter':
        df_date= df_date[df_date['origen']!=df_date['destino']] #only keep trips between areas
    elif trips_type=='intra':
        df_date= df_date[df_date['origen']==df_date['destino']] #only keep trips within the areas
    
    
    
    #convert period column to a two-digit string
    df_date['periodo'] = df_date['periodo'].apply(lambda x: x.zfill(2))
    df_date= df_date.fillna(0) # set nan as 0
    return df_date



Target time period

In [14]:
to_date = datetime.datetime.strptime('2020-07-3', '%Y-%m-%d')
from_date = datetime.datetime.strptime('2020-07-01', '%Y-%m-%d')

See shared excel file with criteria definitions

In [19]:
criterion_1= {'destino':['3003001'], 'distancia': ['010-050','050-100', '100+'], 'actividad_destino':['trabajo']}
#criterion_2={}...

In [21]:
def generate_subflow_fn(from_date, to_date, criterion):
    dfs= []
    target_days = generate_days_fn(from_date, to_date)
    for date in tqdm_notebook(target_days):
        df = read_INE_trips_date_fn(date)
        for k in criterion:
            df = df[df[k].isin(criterion[k])]
        dfs.append(df)
    
    criterion_df = pd.concat(dfs,axis=0)
    return criterion_df
    

In [22]:
c1_df = generate_subflow_fn(from_date,to_date, criterion_1)

HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




In [23]:
c1_df.head()

Unnamed: 0,fecha,origen,destino,actividad_origen,actividad_destino,residencia,edad,periodo,distancia,viajes,viajes_km
176907,20200701,3059,3003001,otros,trabajo,30,0.0,12,010-050,5.333,226.839
252620,20200701,309904,3003001,casa,trabajo,3,0.0,7,010-050,6.088,123.206
257160,20200701,309905,3003001,otros,trabajo,30,0.0,8,010-050,4.521,158.573
324473,20200701,3902,3003001,otros,trabajo,30,0.0,9,010-050,6.07,218.299
324474,20200701,3902,3003001,otros,trabajo,30,0.0,16,010-050,9.195,324.275


In [29]:
sum_trips_c_df= c1_df.groupby(['fecha','periodo']).agg({'viajes':['sum']})

In [34]:
sum_trips_c_df.reset_index()

Unnamed: 0_level_0,fecha,periodo,viajes
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum
0,20200701,1,7.185
1,20200701,3,6.51
2,20200701,4,5.036
3,20200701,5,11.157
4,20200701,6,18.017
5,20200701,7,49.478
6,20200701,8,73.16
7,20200701,9,14.498
8,20200701,10,4.007
9,20200701,11,23.158
