In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium

from geopy.geocoders import Nominatim

%matplotlib inline

origin = [41.5511, -8.428]

filepath = 'data/Traffic_Incidents_Braga_Until_20190228.csv'

cols = ['description', 'cause_of_incident', 'from_road', 'to_road',
       'affected_roads', 'incident_category_desc', 'magnitude_of_delay_desc',
       'length_in_meters', 'delay_in_seconds', 'incident_date']

dtypes = {'description': 'category',
 'cause_of_incident': 'category',
 'from_road': 'category',
 'to_road': 'category',
 'affected_roads': 'category',
 'incident_category_desc': 'category',
 'magnitude_of_delay_desc': 'category',
 'length_in_meters': 'uint16',
 'delay_in_seconds': 'uint16',
 'incident_date': 'category'}

df = pd.read_csv(filepath, usecols=cols, dtype=dtypes, parse_dates=['incident_date'])

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45714 entries, 0 to 45713
Data columns (total 10 columns):
description                45714 non-null category
cause_of_incident          30 non-null category
from_road                  45714 non-null category
to_road                    45714 non-null category
affected_roads             22641 non-null category
incident_category_desc     45714 non-null category
magnitude_of_delay_desc    45714 non-null category
length_in_meters           45714 non-null uint16
delay_in_seconds           45714 non-null uint16
incident_date              45714 non-null datetime64[ns]
dtypes: category(7), datetime64[ns](1), uint16(2)
memory usage: 988.1 KB


In [2]:
def typecast_objects(gl_obj):
    gl_obj = gl_obj.apply(lambda x: x.str.strip())
    gl_obj = gl_obj.apply(lambda x: x.str.lower())
    
    converted_obj = pd.DataFrame()
    
    for col in gl_obj.columns:
        num_unique_values = len(gl_obj[col].unique())
        num_total_values = len(gl_obj[col])
        if num_unique_values / num_total_values < 0.5:
            converted_obj.loc[:, col] = gl_obj[col].astype('category')
        else:
            converted_obj.loc[:, col] = gl_obj[col]
    
    return converted_obj


def downcast(df):
    df_int = df.select_dtypes(include=['int'])
    converted_int = df_int.apply(pd.to_numeric, downcast='unsigned')

    df_obj = df.select_dtypes(include=['object'])
    converted_obj = typecast_objects(df_obj)

    df[converted_int.columns] = converted_int
    df[converted_obj.columns] = converted_obj
    
    return df


def get_dtypes(df):
    dtypes = df.dtypes

    colnames = dtypes.index
    types = [i.name for i in dtypes.values]

    return dict(zip(colnames, types))

In [3]:
df = df.sort_values(by=['incident_date'])
df = df.reset_index(drop=True)

In [8]:
df.sample(10)

Unnamed: 0,description,cause_of_incident,from_road,to_road,affected_roads,incident_category_desc,magnitude_of_delay_desc,length_in_meters,delay_in_seconds,incident_date
6023,stationary traffic,,São Vítor,Avenida Padre Júlio Fragata,,Jam,Major,320,143,2018-09-07 18:03:53.771
64,slow traffic,,Rua do Caires,N101,,Jam,Minor,910,56,2018-07-24 18:02:04.354
43706,closed,,Avenida Doutor Francisco Salgado Zenha,Avenida Robert Smith,,Road Closed,Undefined,88,0,2019-02-21 06:44:58.994
39866,slow traffic,,Avenida General Norton De Matos (Braga-Circular),N101 (Braga Norte) (Braga-Circular),,Jam,Minor,577,32,2019-02-06 18:24:59.614
32350,closed,,Avenida Doutor Francisco Salgado Zenha,Avenida Robert Smith,,Road Closed,Undefined,88,0,2018-12-04 20:07:08.971
37346,closed,,Avenida Doutor Francisco Salgado Zenha,Avenida Robert Smith,,Road Closed,Undefined,88,0,2019-01-27 12:44:59.665
20643,stationary traffic,,Avenida António Gomes Pereira,Autoestrada Esposende-Guimarães,N14,Jam,Moderate,1010,238,2018-10-17 14:47:20.921
2389,stationary traffic,,Rua Feliciano Ramos (N101) / Rua Cónego Lucian...,Rua Irmãos Roby,,Jam,Major,530,185,2018-08-24 22:23:52.838
39693,bridge closed,,Avenida General Carrilho Da Silva Pinto (N103),Braga-Circular (São Vítor) (N103),N103,Road Closed,Undefined,381,0,2019-02-06 00:04:59.085
19962,stationary traffic,,Rotunda da Associação Jurídica de Braga,Largo de Santa Tecla,,Jam,Major,400,309,2018-10-15 08:07:20.884
