# Hackathon notebook

## Importing Libraries and the dataset

In [0]:
import numpy as np 
import pandas as pd
import plotly.express as px

In [0]:
px.set_mapbox_access_token('pk.eyJ1IjoiY29kZWJsdTMiLCJhIjoiY2s5b2FtYWVwMDd6bTNncG10MDkxc3RoeCJ9._PsvOr4YWdJWawdDLu1A5w')

In [0]:
def dfSchema(df):
    ''' 
    Returns column type of data either categorical, nominal, discrete, or continous
    '''
    dt = {col:[ df[col].dtype
               , df[col].nunique()
               , df[col].isna().sum() 
               , 'categorical' if (df[col].dtype == 'object') and (df[col].nunique() <= 25) else
                   'nominal' if (df[col].dtype == 'object') and (df[col].nunique() >= 25) else
                   'discrete' if (df[col].dtype == 'int64') else
                   'continous' if (df[col].dtype == 'float64') else
                   'datetime' if (df[col].dtype == 'float64') else
                   'unknown'
              ] for col in  df.columns }
    ltheader = ['dtype','nunique','na','colType']
    return pd.DataFrame.from_dict(dt, orient='index',columns = ltheader )

In [0]:
def colTypes(dfSchema):
    '''
    return lists
    '''
    cat, nom, disc, cont = [], [], [], []
    for ctype in dfSchema.colType.unique(): 
        if ctype == 'categorical':
            cat = dfSchema[dfSchema['colType'] == 'categorical'].index.to_list()            
        elif ctype == 'nominal': 
            nom = dfSchema[dfSchema['colType'] == 'nominal'].index.to_list()
        elif ctype == 'discrete': 
            disc = dfSchema[dfSchema['colType'] == 'discrete'].index.to_list()
        elif ctype == 'continous':
            cont = dfSchema[dfSchema['colType'] == 'continous'].index.to_list()

    return cat, nom, disc, cont

In [0]:
df = pd.read_csv("data/data_mmda_traffic_spatial.csv")
df.tail()

Unnamed: 0,Date,Time,City,Location,Latitude,Longitude,Direction,Type,Lanes_Blocked,Involved,Tweet,Source
13962,2019-12-20,8:03 AM,Quezon City,QUEZON AVE AGHAM TUNNEL,14.646323,121.040861,WB,VEHICULAR ACCIDENT,1.0,"SUV, MC, VAN AND TAXI",MMDA ALERT: Vehicular accident at Quezon Ave A...,https://twitter.com/mmda/status/12078210882493...
13963,2019-12-20,8:50 AM,Pasay City,ROXAS BUENDIA,14.552294,120.989491,NB,VEHICULAR ACCIDENT,1.0,ARMORED VEHICLE AND MC,MMDA ALERT: Vehicular accident at Roxas Buendi...,https://twitter.com/mmda/status/12078292444300...
13964,2019-12-20,9:30 AM,Manila,P BURGOS MA OROSA,14.585217,120.979377,EB,VEHICULAR ACCIDENT,1.0,PUJ AND MC,MMDA ALERT: Vehicular accident at P. Burgos Ma...,https://twitter.com/mmda/status/12078389806050...
13965,2019-12-20,10:11 AM,Makati City,C5 KALAYAAN ELEVATED UTS,14.556079,121.062936,SB,VEHICULAR ACCIDENT,1.0,FORD RANGER AND MC,MMDA ALERT: Vehicular accident at C5 elevated ...,https://twitter.com/mmda/status/12078525791509...
13966,2019-12-20,10:54 AM,Quezon City,EDSA ROCHESTER,14.59758,121.059718,SB,VEHICULAR ACCIDENT,1.0,2 CARS,MMDA ALERT: Vehicular accident at EDSA Rochest...,https://twitter.com/mmda/status/12078605163301...


In [0]:
df.shape

(13967, 12)

In [0]:
#checking the number of null values per column
df.isnull().sum()

Date               0
Time             115
City              91
Location           4
Latitude           0
Longitude          0
Direction        701
Type              36
Lanes_Blocked    592
Involved         370
Tweet              0
Source             0
dtype: int64

## Preliminary dataset exploration and question asking

### How many incidents were reported by the MMDA from August 20, 2018 to December 20, 2019?

In [0]:
# How many incidents were reported by the MMDA from August 20, 2018 to December 20, 2019?
len(df)

13967

A total of 13967 incidents were reported from MMDA's twitter account from August 20, 2018 to December 20, 2019

### How many incidents happened per city?

In [0]:
#How many incidents happened per city?
df["City"].value_counts()

Quezon City      7104
Mandaluyong      2462
Makati City      1944
Pasig City       1387
Pasay City        372
Manila            242
Marikina          101
ParaÃ±aque         96
San Juan           94
Kalookan City      49
Taguig             16
Valenzuela          5
Navotas             3
Malabon             1
Name: City, dtype: int64

In [0]:
df["Location"].value_counts()

EDSA SHAW TUNNEL                         261
EDSA GUADALUPE                           246
EDSA BUENDIA                             202
EDSA MEGAMALL                            198
EDSA MAIN AVE                            171
                                        ... 
MARCOS HIGHWAY TROPICAL HUT FELIX AVE      1
INSIDE OF SM MEGAMALL LOADING BAY          1
MIA IMELDA FRONTING PETRON                 1
EDSA SHAW AFTER INTERSECTION               1
KAMUNING K-E                               1
Name: Location, Length: 2557, dtype: int64

###### Follow up questions:

A majority of the incidents happened in QC, Manda, Makati and Pasig. Can we do a per city analysis of the incident reports to see if we can find insights that will help the MMDA with incident management per city?

##### Other questions to answer:
1. Why did most of the incidents occur in QC?
2. Why was the concentration of the incidents around these 4 cities?

### How many incidents happened when classified under direction?

In [0]:
#How many incidents happened when classified under direction?
df["Direction"].value_counts()

NB        5308
SB        4862
EB        1649
WB        1441
PAX          2
CLARA        1
EB.          1
DAR          1
CLOSED       1
Name: Direction, dtype: int64

###### Follow up questions:

A majority of the incidents that happened were Northbound and Southbound. Can we do an in depth analysis on the incidents based on the incident directions?

##### Other questions to answer:
1. Why were most of the incidents northbound? (Is this related to QC being the city with the highest incident rates)
2. Why were there noticeably more northbound and southbound incidents than east and westbound incidents?
3. Meaning of other direction classfications

In [0]:
#Find the rows with PAX, DAR, CLARA, CLOSED and EB to know the meaning of the terms
df[df["Direction"] == "PAX"]

Unnamed: 0,Date,Time,City,Location,Latitude,Longitude,Direction,Type,Lanes_Blocked,Involved,Tweet,Source
343,9/1/2018,7:10 AM,Manila,ROXAS BLVD. KALAW,14.579835,120.977331,PAX,RALLYIST,1.0,,MMDA ALERT: Rallyist at Kalaw Roxas blvd. more...,https://twitter.com/mmda/status/10360310472369...
446,9/5/2018,11:52 AM,Quezon City,EDSA AGUINALDO GATE 2 MORE OR LESS 40,14.606095,121.057183,PAX,RALLYIST,1.0,,MMDA ALERT: Rallyist at EDSA Aguinaldo Gate 2 ...,https://twitter.com/mmda/status/10371893606119...


### How many incidents happened when classified in terms of type and vehicles involved?

In [0]:
#No. of incidents in terms of type
df["Type"].value_counts()

VEHICULAR ACCIDENT                                                                                                                           9498
STALLED BUS DUE TO MECHANICAL PROBLEM                                                                                                         740
MULTIPLE COLLISION                                                                                                                            541
STALLED CAR DUE TO MECHANICAL PROBLEM                                                                                                         461
STALLED TRUCK DUE TO MECHANICAL PROBLEM                                                                                                       276
                                                                                                                                             ... 
STALLED JEEP DUE TO MECHANICAL DEFECT                                                                                       

In [0]:
# No. of incidents in terms of vehicles involved
df["Involved"].value_counts()

BUS                 843
CAR                 554
2 CARS              434
TRUCK               339
BUS AND CAR         265
                   ... 
CAR, AUV AND MC       1
AUV AND TANKER        1
A HONDA CIVIC         1
PICK UP AND AUV       1
3-CARS                1
Name: Involved, Length: 1783, dtype: int64

###### Follow up questions:
A majority of the incidents that happened are classified as 'vehicular accidents'. In this dataset, vehicular accidents are generally one on one collisions between a vehicle and another vehicle or a civilian. Maybe we can explore how many types of public vehicles were involved in an incident to see if the MMDA can have tighter regulations around said vehicles? (PUJ, Buses, UV Express)

##### Other questions to answer:
1. How many of the incidents involved buses
2. How many of the incidents involved PUJ (public utility jeeps)
3. How many of the incidents involved Jeeps
4. How many of the incidents involved MCS (motorcycles)
5. How many of the incidents ivolved armored trucks
6. How many of the incidents involved civilian 'cars'

### What were the no. of incidents in terms of lanes blocked?

In [0]:
#No. of incidents in terms of lanes blocked
df["Lanes_Blocked"].value_counts()

1.0    11996
2.0     1371
3.0        6
6.0        1
4.0        1
Name: Lanes_Blocked, dtype: int64

###### Follow up questions:
It's good that for the majority of the incidents, only 1 lane was blocked. But for those incidents with 2 or more lanes blocked, did they have anything in common? And if so, what can be done for those numbers to be further reduced?

## Data Cleaning & Feature Engineering

In [0]:
df['Date'] = pd.to_datetime(df['Date'])#convert to datetime
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month
df['day'] = df['Date'].dt.day
df['day_name'] = df['Date'].dt.strftime('%A')
df['cnt'] = 1
df[['year','month','day','day_name','cnt']].head()

Unnamed: 0,year,month,day,day_name,cnt
0,2018,8,20,Monday,1
1,2018,8,20,Monday,1
2,2018,8,20,Monday,1
3,2018,8,20,Monday,1
4,2018,8,20,Monday,1


In [0]:
df['date'] =  df['Date']

In [0]:
df = df.set_index('date')

In [0]:
def new_time(str):
    if len(str) < 2:
        return 'No Time'
    else:
        return str[0] +" "+ str[-2:]
#Create time
df['Time'] = df['Time'].astype('str')
df['hour'] = df['Time'].apply(new_time)
df['hour'].head()

df['City'] = df.City.str.replace(r'(^.*Para.*$)', 'Paranaque')#change Paranaque
df['Location'] = df['Location'].str.strip()#linisin ung location na column
df['Direction'] = df.Direction.str.replace(r'(^.*EB.*$)', 'EB')#change EB

In [0]:
df['Type'] = df['Type'].str.upper()
df['Type'] = df['Type'].fillna('Missing')

In [0]:
import re
def regroup(text):
    others = 'OTHERS'
    try:
        if re.search(r'(^.*VEHICULAR ACCIDENT.*$)',text):
            return re.sub(r'(^.*VEHICULAR ACCIDENT.*$)','VEHICULAR ACCIDENT', text)
        elif re.search(r'(^.*SELF.*$)',text):
            return re.sub(r'(^.*SELF.*$)','SELF ACCIDENT', text)
        elif re.search(r'(^.*STALLED.*$)',text):
            return re.sub(r'(^.*STALLED.*$)','STALLED VEHICLE', text)
        elif re.search(r'(^.*ONGOING.*$)',text):
            return re.sub(r'(^.*ONGOING.*$)','ONGOING PROJECT', text)
        elif re.search(r'(^.*DPWH.*$)',text):
            return re.sub(r'(^.*DPWH.*$)','ONGOING PROJECT', text)
        elif re.search(r'(^.*ROAD.*$)',text):
            return re.sub(r'(^.*ROAD.*$)','ONGOING PROJECT', text)
        elif re.search(r'(^.*MULTIP.*$)',text):
            return re.sub(r'(^.*MULTIP.*$)','MULTIPLE COLLISION', text)
        elif re.search(r'(^.*RALLY.*$)',text):
            return re.sub(r'(^.*RALLY.*$)','RALLY', text)
        else: return others
    except:
        print('Error')
df['New_Type'] = df['Type'].apply(regroup)
df['New_Type'].value_counts()

VEHICULAR ACCIDENT    9537
STALLED VEHICLE       3148
MULTIPLE COLLISION     562
ONGOING PROJECT        270
SELF ACCIDENT          205
OTHERS                 143
RALLY                  102
Name: New_Type, dtype: int64

In [0]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 13967 entries, 2018-08-20 to 2019-12-20
Data columns (total 19 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           13967 non-null  datetime64[ns]
 1   Time           13967 non-null  object        
 2   City           13876 non-null  object        
 3   Location       13963 non-null  object        
 4   Latitude       13967 non-null  float64       
 5   Longitude      13967 non-null  float64       
 6   Direction      13266 non-null  object        
 7   Type           13967 non-null  object        
 8   Lanes_Blocked  13375 non-null  float64       
 9   Involved       13597 non-null  object        
 10  Tweet          13967 non-null  object        
 11  Source         13967 non-null  object        
 12  year           13967 non-null  int64         
 13  month          13967 non-null  int64         
 14  day            13967 non-null  int64         
 15  da

In [0]:
df.head()

Unnamed: 0_level_0,Date,Time,City,Location,Latitude,Longitude,Direction,Type,Lanes_Blocked,Involved,Tweet,Source,year,month,day,day_name,cnt,hour,New_Type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2018-08-20,2018-08-20,7:55 AM,Pasig City,ORTIGAS EMERALD,14.586343,121.061481,EB,VEHICULAR ACCIDENT,1.0,TAXI AND MC,MMDA ALERT: Vehicular accident at Ortigas Emer...,https://twitter.com/mmda/status/10313302019705...,2018,8,20,Monday,1,7 AM,VEHICULAR ACCIDENT
2018-08-20,2018-08-20,8:42 AM,Mandaluyong,EDSA GUADIX,14.589432,121.057243,NB,STALLED L300 DUE TO MECHANICAL PROBLEM,1.0,L300,MMDA ALERT: Stalled L300 due to mechanical pro...,https://twitter.com/mmda/status/10313462477459...,2018,8,20,Monday,1,8 AM,STALLED VEHICLE
2018-08-20,2018-08-20,9:13 AM,Makati City,EDSA ROCKWELL,14.559818,121.040737,SB,VEHICULAR ACCIDENT,1.0,SUV AND L300,MMDA ALERT: Vehicular accident at EDSA Rockwel...,https://twitter.com/mmda/status/10313589669896...,2018,8,20,Monday,1,9 AM,VEHICULAR ACCIDENT
2018-08-20,2018-08-20,8:42 AM,Mandaluyong,EDSA GUADIX,14.589432,121.057243,NB,STALLED L300 DUE TO MECHANICAL PROBLEM,1.0,L300,MMDA ALERT: Stalled L300 due to mechanical pro...,https://twitter.com/mmda/status/10313590696535...,2018,8,20,Monday,1,8 AM,STALLED VEHICLE
2018-08-20,2018-08-20,10:27 AM,San Juan,ORTIGAS CLUB FILIPINO,14.601846,121.046754,EB,VEHICULAR ACCIDENT,1.0,2 CARS,MMDA ALERT: Vehicular accident at Ortigas Club...,https://twitter.com/mmda/status/10313711248424...,2018,8,20,Monday,1,1 AM,VEHICULAR ACCIDENT


In [0]:
df['City'] = df['City'].fillna('Missing')

In [0]:
df['Lanes_Blocked'] = df['Lanes_Blocked'].fillna(0)

In [0]:
df['Direction'] = df['Direction'].fillna('Unk')

In [0]:
df['Direction'] = df['Direction'].apply(lambda x: x  if x in ['NB', 'SB', 'EB', 'WB'] else 'Others')

In [0]:
df.Time.value_counts()

nan        115
7:40 AM     48
7:30 AM     44
6:30 AM     35
8:35 AM     34
          ... 
9:57 PM      1
4:49PM       1
2:58 AM      1
2:11 AM      1
1:47 AM      1
Name: Time, Length: 1419, dtype: int64

In [0]:
df['ptime'] = df.Time.str.replace('nan', '00:00 AM')

In [0]:
df.ptime.str.len().value_counts()

7     10499
8      3415
6        28
5        18
9         5
10        2
Name: ptime, dtype: int64

In [0]:
df['ptime']= df.ptime.str.replace('M','')
df['ptime']= df.ptime.str.replace('A','')
df['ptime']= df.ptime.str.replace('P','')


In [0]:
df['ptime'] = df.ptime.str.strip()

In [0]:
df['New_hour']  = df['ptime'].apply(lambda x: x.split(':')[0]).astype(int)

In [0]:
def timeCat(hr):
  tcat = ''
  if hr == 0:
    tcat = 'NA'
  elif hr >= 1 and hr< 6: 
    tcat = 'Dawn'
  elif hr >=6  and hr < 11:
    tcat = 'Morning'
  elif hr >= 11 and hr < 13:
    tcat = 'Lunch'
  elif hr >= 13 and hr < 18:
    tcat = 'Afternoon'
  elif hr >=18:
    tcat = 'Evening'
  return tcat

In [0]:
df['HourCat']  = df['New_hour'].apply(timeCat)

In [0]:
a.value_counts()

1    3827
7    1709
8    1548
6    1530
9    1267
5    1107
4    1095
3     986
2     783
n     115
Name: hour, dtype: int64

In [0]:
schMMDA = dfSchema(df)

In [0]:
schMMDA

Unnamed: 0,dtype,nunique,na,colType
Date,datetime64[ns],484,0,unknown
Time,object,1419,0,nominal
City,object,15,0,categorical
Location,object,2540,4,nominal
Latitude,float64,1066,0,continous
Longitude,float64,1064,0,continous
Direction,object,5,0,categorical
Type,object,420,0,nominal
Lanes_Blocked,float64,6,0,continous
Involved,object,1783,370,nominal


In [0]:
cat, nom, disc, cont = colTypes(schMMDA)


## Interactive Map ( VIsualizing the MMDA Traffic Incident Reports )

In [0]:
import plotly.graph_objects as go# Create figure
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=list(df.Date), y=list(df.cnt)))# Set title
fig.update_layout(
    title_text="Time series with range slider and selectors"
)# Add range slider
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)
fig.show()




In [0]:
dailyCnt = df.groupby([pd.Grouper(freq='D')])['cnt'].agg(sum).reset_index()

In [0]:
figdc = px.line(dailyCnt, x="date", y="cnt", title='Daily Occurences of Incidents', labels = {'cnt': 'Frequency'} )
figdc.show()

In [0]:
monthlyCnt = df.groupby([pd.Grouper(freq='M')])['cnt'].agg(sum).reset_index()

In [0]:
monthlyCnttb = df.groupby([pd.Grouper(freq='M'),'New_Type'])[['cnt']].agg('sum').reset_index()

In [0]:
monthlyCnttb

Unnamed: 0,date,New_Type,cnt
0,2018-08-31,MULTIPLE COLLISION,15
1,2018-08-31,ONGOING PROJECT,11
2,2018-08-31,OTHERS,4
3,2018-08-31,RALLY,2
4,2018-08-31,SELF ACCIDENT,2
...,...,...,...
114,2019-12-31,OTHERS,5
115,2019-12-31,RALLY,1
116,2019-12-31,SELF ACCIDENT,10
117,2019-12-31,STALLED VEHICLE,92


In [0]:
px.scatter(monthlyCnttb, x = 'date', y = 'cnt', color = 'New_Type')

In [0]:
{'B': ['min', 'max'], 'C': 'sum'}

In [0]:
figdc = px.line(monthlyCnt, x="date", y="cnt", title='Monthly Occurences of Incidents', labels = {'cnt': 'Frequency'} )
figdc.show()

**Accidents per Direction**

In [0]:
df.Direction.value_counts()*100/len(df.Direction)

NB        38.003866
SB        34.810625
EB        11.813561
WB        10.317176
Others     5.054772
Name: Direction, dtype: float64

In [0]:
px.pie(df, values='cnt', names='Direction')

In [0]:
figMP_test = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", color = 'Direction'
                          , size ='Lanes_Blocked'
                          , zoom=10
                         )
figMP_test.show()

**Though per occurences, Northbound have highest occurences, per mapping, south bound accident dominates EDSA,C5,  . Commonwealth is dominated by WB, NB is sparse**

**Accidents per Time**

In [0]:
dfgtime = df[df['Time'].str.len() == 7]

In [0]:
dfgtime['miltime'] = dfgtime.Time.apply(strAMPM)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [0]:
def strAMPM(strt):
  if strt.find('AM') > 1 :
    x = int(strt.split(':')[0])
  else:
    x = int(strt.split(':')[0]) + 12 
  return x 

In [0]:
dfgtime['Hour_catNew'] = dfgtime.miltime.apply(timeCat)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [0]:

dfgtime.strTime.value_counts()

2020-05-02 07:40:00    48
2020-05-02 07:30:00    44
2020-05-02 06:30:00    35
2020-05-02 08:35:00    34
2020-05-02 07:35:00    33
                       ..
2020-05-02 03:39:00     1
2020-05-02 03:49:00     1
2020-05-02 04:07:00     1
2020-05-02 03:42:00     1
2020-05-02 01:36:00     1
Name: strTime, Length: 1003, dtype: int64

In [0]:
figMP_time = px.scatter_mapbox(dfgtime, lat="Latitude", lon="Longitude", color = 'Hour_catNew'
                          , size ='Lanes_Blocked'
                          , zoom=10
                         )
figMP_time.show()

In [0]:
px.pie(dfgtime, values='cnt', names='Hour_catNew')

In [0]:
px.histogram(dfgtime, x= 'miltime')

In [0]:
figMP_time = px.scatter_mapbox(dfgtime, lat="Latitude", lon="Longitude", color = 'miltime'
                          , size ='Lanes_Blocked'
                          , zoom=10
                         )
figMP_time.show()

**Unanimously most of the accidents happening at edsa occur at early dawn. Probably, these are drunk party goers. For edit, improper data cleaning**

In [0]:
figMP_Veh = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", color = 'New_Type'
                          , size ='Lanes_Blocked'
                          , zoom=10
                         )
figMP_Veh.show()

**Self Accidents are mostly on C5 and EDSA**

Index(['Date', 'Time', 'City', 'Location', 'Latitude', 'Longitude',
       'Direction', 'Type', 'Lanes_Blocked', 'Involved', 'Tweet', 'Source',
       'year', 'month', 'day', 'day_name', 'cnt', 'hour', 'New_Type', 'ptime',
       'New_hour', 'HourCat'],
      dtype='object')

In [0]:
df['payday'] = df.day.apply(lambda x: 'payday' if x  in [15,30,31] else 'not payday' )

**pay day**

In [0]:
figMP_Veh = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", color = 'payday'
                          , size ='Lanes_Blocked'
                          , zoom=10
                         )
figMP_Veh.show()

In [0]:
px.pie(df, values='cnt', names='payday')

In [0]:
px.parallel_categories(df[['payday', 'day_name', 'Lanes_Blocked']], color = 'Lanes_Blocked')

In [0]:
px.parallel_categories(df[['New_Type','day_name', 'Lanes_Blocked']], color = 'Lanes_Blocked')

In [0]:
df.columns

Index(['Date', 'Time', 'City', 'Location', 'Latitude', 'Longitude',
       'Direction', 'Type', 'Lanes_Blocked', 'Involved', 'Tweet', 'Source',
       'year', 'month', 'day', 'day_name', 'cnt', 'hour', 'New_Type', 'ptime',
       'New_hour', 'HourCat'],
      dtype='object')

In [0]:
aSch =  dfSchema(df)

In [0]:
 cat, nom, disc, cont = colTypes(aSch) 

In [0]:
cat

['City', 'Direction', 'day_name', 'New_Type', 'HourCat']

In [0]:
px.parallel_categories(df[cat + ['Lanes_Blocked']], color = 'Lanes_Blocked')

## Visualizing the Insights

In [0]:
cat


**Folium Visulization of Incidents Based on 5 Car Categories**

In [0]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import MarkerCluster

In [0]:
store_icon = "plus"

In [0]:
carA = "TAXI AND MC"
carB = "L300"
carC = "2 CARS"
carD = "BUS AND CAR"
carE = "3 CARS"

dfA = df[df["Involved"]==carA]
dfB = df[df["Involved"]==carB]
dfC = df[df["Involved"]==carC]
dfD = df[df["Involved"]==carD]
dfE = df[df["Involved"]==carE]

In [0]:
brand_colorA = "red"
brand_colorB = "orange"
brand_colorC = "blue"
brand_colorD = "green"
brand_colorE = "purple"

In [0]:
marker_coords = [14.552333, 121.018916] 

In [0]:
vehicle_map = folium.Map(location=marker_coords,height=700, width=1000, tiles="OpenStreetMap", 
                    zoom_start=13)

In [0]:
marker_clusterA = MarkerCluster().add_to(vehicle_map)

In [0]:
vehicle = carA
brand_color = brand_colorA
df_brand = dfA

for i in np.arange(len(df_brand)):
    lat = df_brand["Latitude"].values[i]
    lon = df_brand["Longitude"].values[i]
    folium.Marker(location=[lat,lon], 
                  icon=folium.Icon(color=brand_color,icon=store_icon),
                  popup="vehicle: "+vehicle).add_to(marker_clusterA)

In [0]:
vehicle = carB
brand_color = brand_colorB
df_brand = dfB

for i in np.arange(len(df_brand)):
    lat = df_brand["Latitude"].values[i]
    lon = df_brand["Longitude"].values[i]
    folium.Marker(location=[lat,lon], 
                  icon=folium.Icon(color=brand_color,icon=store_icon),
                  popup="vehicle: "+vehicle).add_to(marker_clusterA)

In [0]:
vehicle = carC
brand_color = brand_colorC
df_brand = dfC

for i in np.arange(len(df_brand)):
    lat = df_brand["Latitude"].values[i]
    lon = df_brand["Longitude"].values[i]
    folium.Marker(location=[lat,lon], 
                  icon=folium.Icon(color=brand_color,icon=store_icon),
                  popup="vehicle: "+vehicle).add_to(marker_clusterA)

In [0]:
vehicle = carD
brand_color = brand_colorD
df_brand = dfD

for i in np.arange(len(df_brand)):
    lat = df_brand["Latitude"].values[i]
    lon = df_brand["Longitude"].values[i]
    folium.Marker(location=[lat,lon], 
                  icon=folium.Icon(color=brand_color,icon=store_icon),
                  popup="vehicle: "+vehicle).add_to(marker_clusterA)

In [0]:
vehicle = carE
brand_color = brand_colorE
df_brand = dfE

for i in np.arange(len(df_brand)):
    lat = df_brand["Latitude"].values[i]
    lon = df_brand["Longitude"].values[i]
    folium.Marker(location=[lat,lon], 
                  icon=folium.Icon(color=brand_color,icon=store_icon),
                  popup="vehicle: "+vehicle).add_to(marker_clusterA)

In [0]:
vehicle_map