# Analysis of Traffic Collision Patterns: Time, Weather and Road Factors

## Step 1: Data Loading from CSV to python dataframe

In this project, the collision dataset is downloaded from Transport Canadaâ€™s National Collision Database (NCDB) in CSV format. The dataset used to analyze time-based trends (CollisionByTime) contains 22,462 records, while the road type (CollisionByRoadType) dataset includes 18,720 entries. The weather trends (CollisionByWeather) dataset has 15,444 records, and the combined road and weather factors (CollisionByWeatherAndRoadAlignment) dataset comprises 11,232 entries. All datasets were downloaded in CSV format, covering years from 2011 to 2021. CollisionByTime acts as a superset for other datasets. The dataset for TorontoCollisions has 772517 records and each row represent details of a single collision event.

In [3]:
#import required libraries
import pandas as pd
import csv

import mysql.connector
from mysql.connector import errorcode


In [37]:
# Step 1.1: Load each csv files to python object

# CollisionByTime
url_CollisionByTime = "https://raw.githubusercontent.com/anithamonica/DATA604_TrafficIncidents/main/datasets/CollisionByTime.csv"
colNamesColsnByTime = ['Month', 'Year', 'DayOfWeek', 'CollisionHour', 'CollisionType', 
              'NoofCollisions', 'NoofVehicles', 'NoofInjured', 'NoofFatalities']
dfByTime = pd.read_csv(url_CollisionByTime, skiprows=3, names = colNamesColsnByTime)
display(dfByTime.head())

# CollisionByRoadType
url_CollisionByRoadType = "https://raw.githubusercontent.com/anithamonica/DATA604_TrafficIncidents/main/datasets/CollisionByRoadType.csv"
colNamesColsnByRoadType = ['Month', 'Year', 'RoadClass', 'SpeedLimit', 'Pedestrains',  
              'NoofCollisions', 'NoofInjured', 'NoofFatalities']
dfByRoadType = pd.read_csv(url_CollisionByRoadType, skiprows=3, names = colNamesColsnByRoadType)
display(dfByRoadType.head())


# CollisionByWeather
url_CollisionByWeather = "https://raw.githubusercontent.com/anithamonica/DATA604_TrafficIncidents/main/datasets/CollisionByWeather.csv"
colNamesColsnByWeather = ['Month', 'Year', 'RoadSurface', 'Weather',  
              'NoofCollisions', 'NoofInjured', 'NoofFatalities']
dfByWeather = pd.read_csv(url_CollisionByWeather, skiprows=3, names = colNamesColsnByWeather)
display(dfByWeather.head())


# CollisionByWeatherAndRoad
url_CollisionByWeatherAndRoad = "https://raw.githubusercontent.com/anithamonica/DATA604_TrafficIncidents/main/datasets/CollisionByWeatherAndRoadAlignment.csv"
colNamesColsnByWeatherAndRoad = ['Month', 'Year', 'RoadAlignment', 'Weather',  
              'NoofCollisions', 'NoofVehicles', 'NoofPersons', 'NoofInjured', 'NoofFatalities']
dfByWeatherAndRoad = pd.read_csv(url_CollisionByWeatherAndRoad, skiprows=3, names = colNamesColsnByWeatherAndRoad)
display(dfByWeatherAndRoad.head())

dfByTimeCount =  len(dfByTime)
dfByRoadTypeCount =  len(dfByRoadType)
dfByWeatherCount =  len(dfByWeather)
dfByWeatherAndRoadCount =  len(dfByWeatherAndRoad)


Unnamed: 0,Month,Year,DayOfWeek,CollisionHour,CollisionType,NoofCollisions,NoofVehicles,NoofInjured,NoofFatalities
0,January,2010,Total,Midnight to 2:59,Fatal collisions,14,17,6,14
1,January,2010,Total,Midnight to 2:59,Injury collisions,390,557,580,0
2,January,2010,Total,3:00 to 5:59,Fatal collisions,9,14,12,9
3,January,2010,Total,3:00 to 5:59,Injury collisions,349,511,461,0
4,January,2010,Total,6:00 to 8:59,Fatal collisions,26,40,12,30


Unnamed: 0,Month,Year,RoadClass,SpeedLimit,Pedestrains,NoofCollisions,NoofInjured,NoofFatalities
0,January,2010,Urban,Less than 40 km per hour,Collisions with 0 pedestrians,43,61,1
1,January,2010,Urban,Less than 40 km per hour,Collisions with 1 pedestrian,30,31,0
2,January,2010,Urban,Less than 40 km per hour,Collisions with 2 pedestrians,4,7,0
3,January,2010,Urban,Less than 40 km per hour,Collisions with 3 or more pedestrians,1,4,0
4,January,2010,Urban,40 km per hour,Collisions with 0 pedestrians,159,219,1


Unnamed: 0,Month,Year,RoadSurface,Weather,NoofCollisions,NoofInjured,NoofFatalities
0,January,2010,"Dry, normal",Clear and sunny,2954,4102,50
1,January,2010,"Dry, normal","Overcast, cloudy but no precipitation",352,485,9
2,January,2010,"Dry, normal",Raining,9,10,0
3,January,2010,"Dry, normal","Snowing, not including drifting snow",6,6,1
4,January,2010,"Dry, normal","Freezing rain, sleet, hail",1,1,0


Unnamed: 0,Month,Year,RoadAlignment,Weather,NoofCollisions,NoofVehicles,NoofPersons,NoofInjured,NoofFatalities
0,January,2010,Straight and level,Clear and sunny,4166,7746,11765,5726,60
1,January,2010,Straight and level,"Overcast, cloudy but no precipitation",874,1536,2206,1153,13
2,January,2010,Straight and level,Raining,425,723,1113,565,3
3,January,2010,Straight and level,"Snowing, not including drifting snow",928,1537,2297,1275,8
4,January,2010,Straight and level,"Freezing rain, sleet, hail",74,109,165,102,2


Unnamed: 0,Date,Year,Month,DayofWeek,CollisionHour,NoofFatalities,IsInjuryCollsn,IsPropertyDamage,IsPedestrain
0,2014-01-01 5:00,2014,January,Wednesday,17,0,NO,,NO
1,2014-01-01 5:00,2014,January,Wednesday,14,0,NO,,NO
2,2014-01-01 5:00,2014,January,Wednesday,2,0,YES,,NO
3,2014-01-01 5:00,2014,January,Wednesday,3,0,NO,,NO
4,2014-01-01 5:00,2014,January,Wednesday,5,0,YES,,NO


In [46]:
USER = "student"
DB   = "student"
    
# attempt a connection
myconnection = mysql.connector.connect(user=USER, 
                                       password='Bi3KSjqgrNOOL',
                                       host='127.0.0.1', 
                                       port=3306,
                                       database=DB,
                                       allow_local_infile=True)
myconnection

DatabaseError: 2003 (HY000): Can't connect to MySQL server on '127.0.0.1:3306' (61)

In [None]:
# CREATE TABLE STATEMENT
createTableDict = {}
createTableDict["CollisionByTime"] = '''CREATE TABLE student.CollisionByTime(
    CollisionMonth varchar(15) NOT NULL,
    CollisionYear int, 
    DayOfWeek varchar(10),
    CollisionHour varchar(20),
    CollisionType varchar(20),
    NoofCollisions int,
    NoofVehicles int,
    NoofInjured int,
    NoofFatalities int);'''

createTableDict["CollisionByRoadType"] = '''CREATE TABLE student.CollisionByRoadType(
    CollisionMonth varchar(15) NOT NULL,
    CollisionYear int, 
    RoadClass varchar(10),
    SpeedLimit varchar(35),
    Pedestrains varchar(35),
    NoofCollisions int,
    NoofInjured int,
    NoofFatalities int);'''

createTableDict["CollisionByWeather"] = '''CREATE TABLE student.CollisionByWeather(
    CollisionMonth varchar(15) NOT NULL,
    CollisionYear int, 
    RoadSurface varchar(25),
    Weather varchar(25),
    NoofCollisions int,
    NoofInjured int,
    NoofFatalities int);'''

createTableDict["CollisionByWeatherAndRoad"] = '''CREATE TABLE student.CollisionByWeatherAndRoad(
    CollisionMonth varchar(15) NOT NULL,
    CollisionYear int, 
    RoadAlignment varchar(25),
    Weather varchar(25),
    NoofCollisions int,
    NoofVehicles int,
    NoofPersons int,
    NoofInjured int,
    NoofFatalities int);'''



createCursor = myconnection.cursor()
for table in createTableDict:
    #print(createTableDict[table])
    try:
        createCursor.execute(createTableDict[table])
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_TABLE_EXISTS_ERROR:
            print("Error! Table(" + table +") is already created.")
        else:
            print(err.msg)
    else:
        print("Table created successfully!")

createCursor.close()

In [None]:
#Insert recortds into MySQL database

#function to insert into table
def insertIntoTable(tableName, df):
    #print(tableName)
    #i = 0 
    insertCursor = myconnection.cursor()
    for i, currentRow in df.iterrows():
        insertCommand = "INSERT INTO `"+ tableName +"` VALUES (" + "%s,"*(len(currentRow)-1) + "%s)"
        #print(currentRow)
        #if i == 10:
            #break
        #i= i +1
        insertCursor.execute(insertCommand, tuple(currentRow))
    myconnection.commit()


# Check if records are already inserted
def insertCheck(tableName, recordCount, resultCount, df):
    print(tableName, "\n count of records: ", recordCount, "\n actual from table: ",resultCount)
    if recordCount == resultCount :
        print("Error - insertCheck() - " + tableName + ": Records already inserted!")
    else:
        insertIntoTable(tableName, df)
        print("(" + tableName + "): Records to be inserted!")
        

def checkIfInserted(tableName):
    readCursor = myconnection.cursor(buffered=True, dictionary=True)
    queryString = ("SELECT COUNT(*) FROM "+ tableName + ";")
    try:
        readCursor.execute(queryString)
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_TABLE_EXISTS_ERROR:
            print("Error - checkIfInserted() - " + tableName +": Issue in select count statement.")
        else:
            print("Error - checkIfInserted():", err.msg)
    else:
        for (library_value) in readCursor:
            count = library_value['COUNT(*)']
            #print(type(count))
            match tableName:
                case 'CollisionByTime':
                    insertCheck(tableName, dfByTimeCount, count, dfByTime)
                case 'CollisionByRoadType':
                    insertCheck(tableName, dfByRoadTypeCount, count, dfByRoadType)
                    display(dfByRoadType.head())
                case 'CollisionByWeather':
                    insertCheck(tableName, dfByWeatherCount, count, dfByWeather)
                case 'CollisionByWeatherAndRoad':
                    insertCheck(tableName, dfByWeatherAndRoadCount, count, dfByWeatherAndRoad)
    read_cursor.close()
                    
        
tables = {1: "CollisionByTime", 2: "CollisionByRoadType", 3 : "CollisionByWeather", 4 : "CollisionByWeatherAndRoad"}
#tables = {1: "CollisionByTime123", 2: "CollisionByRoadType123", 3 : "CollisionByWeather1231", 4 : "CollisionByWeatherAndRoad1231"}
for index in tables:
    #print(tables[index])
    checkIfInserted(tables[index])

In [None]:
# Step 1.2: