In [1]:
import pandas as pd
import numpy as np
import geopandas
import seaborn as sns
%matplotlib inline

In [2]:
def fix_col_names(df):
    df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
    return(df)

## Reading and Preparing Zones data

In [3]:
zones_df = geopandas.GeoDataFrame.from_file('/local/tarciso/data/pesquisa-od-ctba/shapefiles/181zonas_metadata.shp')

In [4]:
zones_df

Unnamed: 0,cod_zona,cod_macroz,municipio,zone_area,zone_pop_d,geometry
0,101,,ALMIRANTE TAMANDARÉ,0.002693,880,POLYGON ((-49.29741553499997 -25.2822542629999...
1,102,,ALMIRANTE TAMANDARÉ,0.011053,99,POLYGON ((-49.28261024699998 -25.2283294099999...
2,103,,ALMIRANTE TAMANDARÉ,0.002410,1376,POLYGON ((-49.29659437999993 -25.3252657799999...
3,104,,ALMIRANTE TAMANDARÉ,0.001301,4343,POLYGON ((-49.27108138899996 -25.3133959639999...
4,111,,ARAUCÁRIA,0.002828,1344,POLYGON ((-49.33834275899994 -25.5150800969999...
5,112,,ARAUCÁRIA,0.035111,31,POLYGON ((-49.43590847299998 -25.4806931729999...
6,113,,ARAUCÁRIA,0.004230,2881,POLYGON ((-49.34288563799998 -25.5577095339999...
7,121,,BOCAIÚVA DO SUL,0.073963,23,POLYGON ((-48.57523350799994 -24.9153968819999...
8,131,,CAMPINA GRANDE DO SUL,0.003735,1137,POLYGON ((-49.11864120699994 -25.3044373429999...
9,132,,CAMPINA GRANDE DO SUL,0.044558,34,POLYGON ((-48.62802764899993 -25.0017790979999...


In [5]:
zones_clean = zones_df.filter(['cod_zona','cod_macroz','municipio'])

In [6]:
len(zones_clean)

181

In [7]:
zones_clean.sort_values('cod_zona')

Unnamed: 0,cod_zona,cod_macroz,municipio
0,101,,ALMIRANTE TAMANDARÉ
1,102,,ALMIRANTE TAMANDARÉ
2,103,,ALMIRANTE TAMANDARÉ
3,104,,ALMIRANTE TAMANDARÉ
4,111,,ARAUCÁRIA
5,112,,ARAUCÁRIA
6,113,,ARAUCÁRIA
7,121,,BOCAIÚVA DO SUL
8,131,,CAMPINA GRANDE DO SUL
9,132,,CAMPINA GRANDE DO SUL


In [8]:
zones_clean.columns

Index(['cod_zona', 'cod_macroz', 'municipio'], dtype='object')

## Reading and Preparing Trips data

In [9]:
desloc_df = pd.read_excel('/local/tarciso/data/pesquisa-od-ctba/dados-extraidos/Deslocamentos.xlsx')

In [10]:
desloc_df

Unnamed: 0,MUNICIPIO DA ENTREVISTA,ZONA DA ENTREVISTA,PESSOA,SEXO,IDADE,MUNICIPIO DE ORIGEM,ZONA DE ORIGEM,COD ORIGEM,MOT ORIGEM,HORA ORIGEM,MUNICIPIO DESTINO,ZONA DESTINO,COD DESTINO,MOT DESTINO,HORA DESTINO,COD TRANSPORTE,TRANSPORTE
0,CURITIBA,1012,6488,Masculino,23.0,CURITIBA,1028,12,Trabalho,17:50,CURITIBA,1011,12,Trabalho,18:00,13,Bicicleta
1,CURITIBA,833,836,Masculino,51.0,CURITIBA,301,12,Trabalho,7:50,CURITIBA,861,12,Trabalho,8:20,6,Dirigindo automovel
2,CURITIBA,941,3815,Masculino,21.0,CURITIBA,681,12,Trabalho,6:11,CURITIBA,1025,12,Trabalho,6:30,6,Dirigindo automovel
3,CURITIBA,833,836,Masculino,51.0,CURITIBA,861,12,Trabalho,17:30,CURITIBA,301,12,Trabalho,17:48,6,Dirigindo automovel
4,SAO JOSE DOS PINHAIS,252,2926,Masculino,49.0,SAO JOSE DOS PINHAIS,252,12,Trabalho,5:00,SAO JOSE DOS PINHAIS,257,12,Trabalho,5:50,4,Onibus fretado
5,SAO JOSE DOS PINHAIS,252,2926,Masculino,49.0,SAO JOSE DOS PINHAIS,252,12,Trabalho,5:00,SAO JOSE DOS PINHAIS,257,12,Trabalho,5:50,4,Onibus fretado
6,CURITIBA,913,9012,Masculino,43.0,CURITIBA,1028,12,Trabalho,19:00,CURITIBA,913,12,Trabalho,19:50,6,Dirigindo automovel
7,CURITIBA,721,10981,Masculino,53.0,CURITIBA,431,12,Trabalho,14:20,SAO JOSE DOS PINHAIS,253,12,Trabalho,14:50,6,Dirigindo automovel
8,ARAUCARIA,113,8765,Masculino,53.0,CURITIBA,301,12,Trabalho,6:50,ARAUCARIA,112,12,Trabalho,7:20,6,Dirigindo automovel
9,CURITIBA,771,5501,Masculino,27.0,CURITIBA,771,12,Trabalho,7:25,CURITIBA,901,12,Trabalho,7:30,6,Dirigindo automovel


In [11]:
desloc_df = fix_col_names(desloc_df)

In [12]:
desloc_df.head()

Unnamed: 0,municipio_da_entrevista,zona_da_entrevista,pessoa,sexo,idade,municipio_de_origem,zona_de_origem,cod_origem,mot_origem,hora_origem,municipio_destino,zona_destino,cod_destino,mot_destino,hora_destino,cod_transporte,transporte
0,CURITIBA,1012,6488,Masculino,23.0,CURITIBA,1028,12,Trabalho,17:50,CURITIBA,1011,12,Trabalho,18:00,13,Bicicleta
1,CURITIBA,833,836,Masculino,51.0,CURITIBA,301,12,Trabalho,7:50,CURITIBA,861,12,Trabalho,8:20,6,Dirigindo automovel
2,CURITIBA,941,3815,Masculino,21.0,CURITIBA,681,12,Trabalho,6:11,CURITIBA,1025,12,Trabalho,6:30,6,Dirigindo automovel
3,CURITIBA,833,836,Masculino,51.0,CURITIBA,861,12,Trabalho,17:30,CURITIBA,301,12,Trabalho,17:48,6,Dirigindo automovel
4,SAO JOSE DOS PINHAIS,252,2926,Masculino,49.0,SAO JOSE DOS PINHAIS,252,12,Trabalho,5:00,SAO JOSE DOS PINHAIS,257,12,Trabalho,5:50,4,Onibus fretado


In [13]:
len(desloc_df.pessoa.drop_duplicates())

27488

In [14]:
len(desloc_df)

71988

## Joining Trips and Zones data based on origin and destination zone code

In [15]:
np.sort(desloc_df['zona_de_origem'].unique())

array([ 101,  102,  103,  104,  111,  112,  113,  121,  131,  132,  141,
        142,  143,  151,  152,  161,  162,  163,  164,  165,  166,  167,
        168,  171,  181,  182,  183,  184,  191,  201,  211,  212,  213,
        214,  221,  222,  231,  241,  251,  252,  253,  254,  255,  256,
        257,  258,  301,  302,  303,  311,  321,  331,  341,  351,  361,
        362,  371,  372,  373,  374,  381,  391,  392,  401,  411,  421,
        431,  441,  451,  461,  471,  481,  482,  491,  492,  493,  494,
        495,  501,  511,  521,  531,  541,  551,  552,  561,  571,  581,
        591,  592,  601,  602,  611,  612,  621,  622,  623,  631,  632,
        641,  642,  651,  661,  671,  681,  682,  683,  691,  692,  701,
        711,  712,  721,  731,  741,  751,  761,  771,  772,  781,  791,
        801,  802,  811,  821,  822,  831,  832,  833,  834,  841,  842,
        843,  851,  852,  853,  861,  871,  872,  881,  891,  901,  902,
        903,  911,  912,  913,  921,  922,  923,  9

In [16]:
np.sort(zones_clean['cod_zona'].unique())

array([ 101,  102,  103,  104,  111,  112,  113,  121,  131,  132,  141,
        142,  143,  151,  152,  161,  162,  163,  164,  165,  166,  167,
        168,  171,  181,  182,  183,  184,  191,  201,  211,  212,  213,
        214,  221,  222,  231,  241,  251,  252,  253,  254,  255,  256,
        257,  258,  301,  302,  303,  311,  321,  331,  341,  351,  361,
        362,  371,  372,  373,  374,  381,  391,  392,  401,  411,  421,
        431,  441,  451,  461,  471,  481,  482,  491,  492,  493,  494,
        495,  501,  511,  521,  531,  541,  551,  552,  561,  571,  581,
        591,  592,  601,  602,  611,  621,  622,  623,  631,  632,  641,
        642,  651,  661,  671,  681,  682,  683,  691,  692,  701,  711,
        712,  721,  731,  741,  751,  761,  771,  781,  791,  801,  802,
        811,  821,  822,  831,  832,  833,  834,  841,  842,  843,  851,
        852,  853,  861,  871,  872,  881,  891,  901,  902,  903,  911,
        912,  913,  921,  922,  923,  924,  925,  9

In [17]:
len(zones_clean['cod_zona'].unique())

181

In [None]:
trips_zones = desloc_df.merge(zones_clean.add_suffix('_o'), left_on='zona_de_origem', right_on='cod_zona_o') \
        .merge(zones_clean.add_suffix('_d'), left_on='zona_destino', right_on='cod_zona_d')

In [None]:
len(trips_zones)

In [None]:
trips_zones

In [None]:
trips_zones.dtypes

In [None]:
trips_zones_clean = trips_zones.filter(['pessoa','sexo','idade','cod_transporte','transporte','zona_de_origem',
                                        'cod_macroz_o','municipio_o','mot_origem','hora_origem','zona_destino',
                                        'cod_macroz_d','municipio_d','mot_destino','hora_destino']) \
                                .sort_values('pessoa')

trips_zones_clean.columns = trips_zones_clean.columns.str.replace('de_origem','o') \
                                                        .str.replace('origem','o') \
                                                        .str.replace('destino','d') \
                                                        .str.replace('transporte','transp') \

trips_zones_clean = trips_zones_clean.rename(index=str, columns={'cod_macroz_o':'cod_macrozone_o','cod_macroz_d':'cod_macrozone_d'})
                                

In [None]:
trips_zones_clean

### Adding Macrozones Names

In [None]:
ctba_macrozones_data = pd.read_csv('/local/tarciso/data/pesquisa-od-ctba/shapefiles/ctba_macrozones_data.csv')

In [None]:
ctba_macrozones_data

In [None]:
trips_zones_clean = trips_zones_clean.merge(ctba_macrozones_data.filter(['cod_macrozone','name_macrozone']).add_suffix('_o')) \
                        .merge(ctba_macrozones_data.filter(['cod_macrozone','name_macrozone']).add_suffix('_d'))

In [None]:
trips_zones_clean.head()

## Selecting only trips performed by municipal or metropolitan bus

In [None]:
trips_zones_clean[['cod_transp','transp']].drop_duplicates().sort_values('cod_transp')

In [None]:
trips_zones_clean.cod_transp.value_counts()

In [None]:
bus_trips_zones = trips_zones_clean[np.logical_or(trips_zones_clean['cod_transp'] == 1,trips_zones_clean['cod_transp'] == 3)]

In [None]:
bus_trips_zones

In [None]:
len(bus_trips_zones)

In [None]:
len(bus_trips_zones)/float(len(trips_zones_clean))

In [None]:
bus_trips_zones.dtypes

In [None]:
bus_trips_zones.to_csv('/local/tarciso/data/od-mat-validation/trips_od_dataframes/survey_bus_trips_zones.csv', index=False)

## Generating Survey OD Matrix for Macrozones

In [None]:
survey_macrozones_trips = bus_trips_zones.groupby(['cod_macrozone_o','name_macrozone_o','cod_macrozone_d','name_macrozone_d']).agg({'pessoa':'count'}).reset_index() \
                                            .rename(index=str, columns={'pessoa':'num_trips'})

In [None]:
survey_macrozones_trips

In [None]:
survey_macrozones_freqs = survey_macrozones_trips.pivot(index='name_macrozone_o',columns='name_macrozone_d',values='num_trips') \
                                                .reindex(ctba_macrozones_data.name_macrozone, axis=0) \
                                                .reindex(ctba_macrozones_data.name_macrozone, axis=1)

In [None]:
survey_macrozones_freqs

In [None]:
survey_macrozones_freqs.to_csv('/local/tarciso/data/od-mat-validation/od-matrices/survey_macrozones_freqs.csv')

In [None]:
sns.heatmap(survey_macrozones_freqs)

In [None]:
survey_macrozones_norm_freqs = survey_macrozones_freqs/survey_macrozones_freqs.values.sum()

In [None]:
survey_macrozones_norm_freqs

In [None]:
survey_macrozones_norm_freqs.to_csv('/local/tarciso/data/od-mat-validation/od-matrices/survey_macrozones_norm_freqs.csv')

In [None]:
sns.heatmap(survey_macrozones_norm_freqs)

## Generating Survey OD Matrix for Zones

In [None]:
survey_zones_trips = bus_trips_zones.groupby(['zona_o','zona_d']).pessoa.count().reset_index(name='num_pass')

In [None]:
survey_zones_trips

In [None]:
survey_zones_freqs = survey_zones_trips.pivot(index='zona_o',columns='zona_d',values='num_pass').fillna(0)

In [None]:
survey_zones_freqs

In [None]:
survey_zones_freqs.values.max()

In [None]:
survey_zones_freqs.to_csv('/local/tarciso/data/od-mat-validation/survey_zones_freqs.csv')

In [None]:
sns.heatmap(survey_zones_freqs)

In [None]:
survey_zones_norm_freqs = survey_zones_freqs/survey_zones_freqs.values.sum()

In [None]:
survey_zones_norm_freqs

In [None]:
survey_zones_norm_freqs.to_csv('/local/tarciso/data/od-mat-validation/survey_zones_norm_freqs.csv')

In [None]:
sns.heatmap(survey_zones_norm_freqs)