In [1]:
import pandas as pd

# iconCategory
# integer
#   The main icon category associated with this incident. This is an icon category associated with the first event in the events list describing the incident. The values meaning:
#       0: Unknown
#       1: Accident
#       2: Fog
#       3: Dangerous Conditions
#       4: Rain
#       5: Ice
#       6: Jam
#       7: Lane Closed
#       8: Road Closed
#       9: Road Works
#       10: Wind
#       11: Flooding
#       14: Broken Down Vehicle

# magnitudeOfDelay
# integer
#   The magnitude of delay associated with an incident. The values meaning:
#       0: Unknown
#       1: Minor
#       2: Moderate
#       3: Major
#       4: Undefined (used for road closures and other indefinite delays)

# startTime
# string
#   Start time of the incident, if available. The date is described in the ISO8601 format.

# endTime
# string
#   End time of the incident, if available. The date is described in the ISO8601 format.

# from
# string
#   The name of the location where the traffic due to the incident starts.

# to
# string
#   The name of the location where the traffic due to the incident ends.

# length
# float
#   The length of the incident in meters.

# delay
# integer
#   The delay in seconds caused by the incident (except road closures).
#   It is calculated against free-flow travel time (the travel time when the traffic is minimal, e.g., night traffic).

# roadNumbers
# array of strings
#   The road number(s) affected by the incident.

# timeValidity
# string
#   Enumeration string describing if the incident occurrence is now or in the future.

# timeValidity
# string
#   Enumeration string describing if the incident occurrence is now or in the future.

# tmc
# object
#   TMC (Traffic Message Channel) data of the traffic incident, needed to determine its location.

# probabilityOfOccurrence
# string

# Enumeration string specifying the likelihood of the occurring incident.
# Allowed values:
#   certain
#   probable
#   risk_of
#   improbable

# numberOfReports
# integer
#   The number of reports given by actual end-users.

# lastReportTime
# string
#   The date in ISO8601 format, when the last time the incident was reported.
#   Gives the user confidence that the incident is fresh.

# aci
# object
#   The Community Attributes (ACI).

In [2]:
incidents_df = pd.read_csv("../../csv/incidents_data_v2.csv.zip", index_col=False)
incidents_df["collectedAt"] = pd.to_datetime(incidents_df["collectedAt"], utc=True)
len(incidents_df)

20574

In [3]:
# Remaining incident types:
# 1 (Accident)
# 3 (Dangerous Conditions)
# 6 (Traffic Jam) - removed because it is redundant to say a jam caused a jam
# 7 (Lane Closed)
# 8 (Road Closed)
# 9 (Road Works)

# remove unnecessary cols
incidents_df = incidents_df.drop(columns=["type",
                                          "magnitudeOfDelay",
                                          "delay",  # all are 0
                                          "timeValidity",  # all are present
                                          "numberOfReports",
                                          "lastReportTime",
                                          "events",
                                          "geometryType",  # all are line string
                                          "tmcCountryCode",
                                          "tmcTableNumber",
                                          "tmcTableVersion",
                                          "tmcDirection",
                                          "tmcPoints",
                                          "tmc"])  # tmc data (irrelevant)

In [4]:
incidents_df.to_csv("../../csv/incidents_data_cleaned_v2.csv.zip", index=False, compression="zip")