### Heatmap Generation - Traffic accident hotspot prediction (2015-2020)

In [1]:
import os
import math
import warnings
from os import walk
import pandas as pd
from tqdm import tqdm
from PIL import Image
import seaborn as sns
import geopy.distance
import matplotlib.pyplot as plt
from geopy.geocoders import Nominatim
from haversine import haversine, Unit

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

#### Methods to help in the generation of spatio-temporal heatmaps

In [None]:
# Most of the heatmap creation functionalities were used/modified from jeffmur's geoLife repository

def createMap(location, cell_size=5):
    
    ## Calculate bounds
    sLat = float(location[0])
    nLat = float(location[1])
    wLon = float(location[2])
    eLon = float(location[3])

    # all four corners
    SE = [sLat, eLon]
    SW = [sLat, wLon]
    NE = [nLat, eLon]
    NW = [nLat, wLon]

    bounds = {"SE": SE, "SW": SW, "NE": NE, "NW": NW}

    # Calculate Distance
    # SW -> NW
    width = math.ceil(geopy.distance.geodesic(SW, NW).miles)

    # NW -> NE
    length = math.ceil(geopy.distance.geodesic(NW, NE).miles)
    if(length == 0):
        length = 2400 #Alaska
        
    # Image Dimensions
    l_pix = int(math.ceil(length / cell_size))
    w_pix = int(math.ceil(width / cell_size))
    
    # Step Size for Lat/Lon comparison
    # Max distance / num of pixels
    
    step_length = (nLat - sLat) / l_pix  #  Step Lenth
    step_width = (eLon - wLon) / w_pix  #  Step Width

    # Steps in degrees
    step = {"width": step_width, "length": step_length}

    # Calculated Width and Length of image
    pix = {"length": l_pix, "width": w_pix}

    return bounds, step, pix


def frequencyHeatmap(bounds, pix, step, stdf):
    # Generates a Frequency Matrix
    nLat = bounds["NE"][0]
    eLon = bounds["NE"][1]

    columns = pix["width"]
    rows = pix["length"]

    step_w = step["width"]
    step_l = step["length"]

    freq_heat = pd.DataFrame(0, index=range(rows+1), columns=range(columns+1))
    lonLat = stdf[stdf.columns[7:9]].to_numpy()
    maxVal = 0

    for location in lonLat:
        r = round((nLat - location[1]) / step_l)
        c = round((eLon - location[0]) / step_w)
        #print(r,c)
        if (c <= columns) and (c >= 0) and (r <= rows) and (r >= 0):
            freq_heat.loc[r, c] += 1

            if maxVal < freq_heat.loc[r, c]:
                maxVal = freq_heat.loc[r, c]
    return maxVal, freq_heat

def genFMprime(freq_heat):
    # Generates an image representation of the Frequency Matrix
    dim = freq_heat.shape
    #print("dim",dim)
    img = Image.new("RGB", (dim[0], dim[1]), color="red")
    pixels = img.load()

    for i in range(freq_heat.shape[0]): #iterate over rows
        for j in range(freq_heat.shape[1]): #iterate over columns
            #print(i,j)
            if(freq_heat.at[i, j] == 1):
                pixels[i, j] = (255, 255, 255)
            else:
                pixels[i, j] = (0, 0, 0)
    
    '''
    for row in freq_heat.itertuples():
        # Need row index for assignment
        for c in range(1, len(row)):
            # Capture data point at [row, column]
            data = row[c]
            freq = int(255 * data)
            pixels[row[0], c - 1] = (freq, freq, freq)
            '''
    #print(img.size)
    return img

def dropOutlyingData(df, boundingbox):
    # Remove data outside the bounding box of Beijing
    lat = boundingbox[0:2]
    lon = boundingbox[2:4]

    return df.loc[
        (df.longitude >= float(lon[0]))
        & (df.longitude <= float(lon[1]))
        & (df.latitude >= float(lat[0]))
        & (df.latitude <= float(lat[1]))
    ].reset_index(drop=True)

#### Load accident datasets from FARS database

In [2]:
# Load data
FARSpath = "/data/fiona123/ProjectData_TrafficFatality/"
FARS = ["FARS2015NationalCSV","FARS2016NationalCSV","FARS2017NationalCSV","FARS2018NationalCSV",
       "FARS2019NationalCSV", "FARS2020NationalCSV"]

d = {}
for i in FARS:
    print(i)
    files = next(walk(FARSpath+i))[2]
    for j in files:
        if ("accident" in j) or ("ACCIDENT" in j):
            print(j)
            d[i[4:8] + "_" + j[:-4]] = pd.read_csv(FARSpath + i + "/" + j,encoding='ISO8859-1')

FARS2015NationalCSV
accident.csv
FARS2016NationalCSV
accident.CSV
FARS2017NationalCSV
accident.CSV
FARS2018NationalCSV
accident.csv
FARS2019NationalCSV
accident.CSV
FARS2020NationalCSV
accident.CSV


#### Concat all accident datasets and create a heatmapid based on Date

In [5]:
names = []
for i in d.keys():
    temp_df = d[i]
    names.append(temp_df)
d['accident'] = pd.concat(names)
d['accident_heatmap'] = d['accident'][['LATITUDE', 'LONGITUD','YEAR','MONTH','DAY','STATENAME']]
d['accident_heatmap']['heatmapid'] = d['accident_heatmap']['YEAR'].astype(str) + '_' + d['accident_heatmap']['MONTH'].astype(str) + '_' + d['accident_heatmap']['DAY'].astype(str)
display(d['accident_heatmap'].head(5))

Unnamed: 0,LATITUDE,LONGITUD,YEAR,MONTH,DAY,STATENAME,heatmapid
0,33.878653,-87.325328,2015,1,1,Alabama,2015_1_1
1,34.910442,-86.908708,2015,1,1,Alabama,2015_1_1
2,32.142006,-85.758456,2015,1,1,Alabama,2015_1_1
3,31.439814,-85.5103,2015,1,4,Alabama,2015_1_4
4,31.319331,-85.5151,2015,1,7,Alabama,2015_1_7


#### Create directories to store heatmaps for each state

In [6]:
states = d['accident_heatmap'].STATENAME.unique()

# Creates 50 directories for each state
for i in tqdm(states):
    path = "/data/fiona123/Project_GeneratedData/" + str(i)
    print(i)
    if not os.path.exists(path):
        os.mkdir(path)

100%|██████████████████████████████████████████████████████████████████████| 51/51 [00:00<00:00, 44196.18it/s]

Alabama
Alaska
Arizona
Arkansas
California
Colorado
Connecticut
Delaware
District of Columbia
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
Montana
Nebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virginia
Wisconsin
Wyoming





#### Create heatmaps and save in respective directories

In [43]:
CELL_SIZE = 10000
SQ_CELL = float(CELL_SIZE) * 0.00062137

for i in tqdm(states):
    print(i)
    app = Nominatim(user_agent="traffic")
    loc = app.geocode(str(i)).raw
    path = "/data/fiona123/Project_GeneratedData/" + str(i) 
    for j in d['accident_heatmap']['heatmapid'].unique():
        df_id = d['accident_heatmap'][d['accident_heatmap']['heatmapid'] == j]
        df_id["longitude"] = df_id["LONGITUD"] 
        df_id["latitude"] = df_id["LATITUDE"] 

        if(len(df_id) == 0):
            del df_id 
            continue
        else: 
            df_id = dropOutlyingData(df_id, loc["boundingbox"])
            bounds, step, pix = createMap(loc["boundingbox"], cell_size=SQ_CELL)
            maxVal, freq_heat = frequencyHeatmap(bounds, pix, step, df_id)
            freq_heat = freq_heat.T
            img = genFMprime(freq_heat)
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            img = img.convert("L")
            img.save(path + "/" + str(j) + '.png', "PNG")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Alabama


  2%|█▍                                                                        | 1/51 [01:00<50:46, 60.93s/it]

Alaska


  4%|██▊                                                                    | 2/51 [17:35<8:18:26, 610.33s/it]

Arizona


  6%|████▏                                                                  | 3/51 [18:53<4:53:51, 367.33s/it]

Arkansas


  8%|█████▌                                                                 | 4/51 [20:00<3:14:33, 248.38s/it]

California


 10%|██████▉                                                                | 5/51 [22:32<2:43:59, 213.90s/it]

Colorado


 12%|████████▎                                                              | 6/51 [23:54<2:06:45, 169.02s/it]

Connecticut


 14%|█████████▋                                                             | 7/51 [24:46<1:35:49, 130.68s/it]

Delaware


 16%|███████████▏                                                           | 8/51 [25:28<1:13:19, 102.31s/it]

District of Columbia


 18%|█████████████                                                             | 9/51 [26:07<57:46, 82.52s/it]

Florida


 20%|█████████████▉                                                         | 10/51 [27:56<1:02:10, 90.98s/it]

Georgia


 22%|███████████████▋                                                         | 11/51 [29:15<58:03, 87.10s/it]

Hawaii


 24%|████████████████▍                                                     | 12/51 [34:40<1:43:36, 159.40s/it]

Idaho


 25%|█████████████████▊                                                    | 13/51 [36:01<1:26:04, 135.91s/it]

Illinois


 27%|███████████████████▏                                                  | 14/51 [37:12<1:11:43, 116.31s/it]

Indiana


 29%|█████████████████████▍                                                   | 15/51 [38:11<59:17, 98.83s/it]

Iowa


 31%|██████████████████████▉                                                  | 16/51 [39:16<51:49, 88.85s/it]

Kansas


 33%|████████████████████████▎                                                | 17/51 [40:25<46:58, 82.90s/it]

Kentucky


 35%|█████████████████████████▊                                               | 18/51 [41:40<44:08, 80.26s/it]

Louisiana


 37%|███████████████████████████▏                                             | 19/51 [42:53<41:46, 78.34s/it]

Maine


 39%|████████████████████████████▋                                            | 20/51 [43:59<38:25, 74.36s/it]

Maryland


 41%|██████████████████████████████                                           | 21/51 [44:56<34:36, 69.20s/it]

Massachusetts


 43%|███████████████████████████████▍                                         | 22/51 [45:49<31:04, 64.28s/it]

Michigan


 45%|████████████████████████████████▉                                        | 23/51 [47:23<34:13, 73.33s/it]

Minnesota


 47%|██████████████████████████████████▎                                      | 24/51 [48:45<34:07, 75.83s/it]

Mississippi


 49%|███████████████████████████████████▊                                     | 25/51 [49:44<30:46, 71.03s/it]

Missouri


 51%|█████████████████████████████████████▏                                   | 26/51 [51:01<30:18, 72.74s/it]

Montana


 53%|██████████████████████████████████████▋                                  | 27/51 [52:31<31:09, 77.89s/it]

Nebraska


 55%|████████████████████████████████████████                                 | 28/51 [53:39<28:40, 74.80s/it]

Nevada


 57%|█████████████████████████████████████████▌                               | 29/51 [55:04<28:34, 77.91s/it]

New Hampshire


 59%|██████████████████████████████████████████▉                              | 30/51 [55:55<24:26, 69.81s/it]

New Jersey


 61%|████████████████████████████████████████████▎                            | 31/51 [56:42<21:03, 63.18s/it]

New Mexico


 63%|█████████████████████████████████████████████▊                           | 32/51 [58:01<21:27, 67.79s/it]

New York


 65%|███████████████████████████████████████████████▏                         | 33/51 [58:44<18:05, 60.31s/it]

North Carolina


 67%|███████████████████████████████████████████████▎                       | 34/51 [1:00:16<19:47, 69.85s/it]

North Dakota


 69%|████████████████████████████████████████████████▋                      | 35/51 [1:01:23<18:23, 68.98s/it]

Ohio


 71%|██████████████████████████████████████████████████                     | 36/51 [1:02:24<16:37, 66.50s/it]

Oklahoma


 73%|███████████████████████████████████████████████████▌                   | 37/51 [1:03:40<16:12, 69.47s/it]

Oregon


 75%|████████████████████████████████████████████████████▉                  | 38/51 [1:04:55<15:26, 71.25s/it]

Pennsylvania


 76%|██████████████████████████████████████████████████████▎                | 39/51 [1:05:56<13:36, 68.05s/it]

Rhode Island


 78%|███████████████████████████████████████████████████████▋               | 40/51 [1:06:36<10:56, 59.68s/it]

South Carolina


 80%|█████████████████████████████████████████████████████████              | 41/51 [1:07:38<10:01, 60.20s/it]

South Dakota


 82%|██████████████████████████████████████████████████████████▍            | 42/51 [1:08:45<09:20, 62.27s/it]

Tennessee


 84%|███████████████████████████████████████████████████████████▊           | 43/51 [1:09:57<08:43, 65.41s/it]

Texas


 86%|████████████████████████████████████████████████████████████▍         | 44/51 [1:13:23<12:32, 107.49s/it]

Utah


 88%|██████████████████████████████████████████████████████████████▋        | 45/51 [1:14:32<09:35, 95.87s/it]

Vermont


 90%|████████████████████████████████████████████████████████████████       | 46/51 [1:15:19<06:46, 81.39s/it]

Virginia


 92%|█████████████████████████████████████████████████████████████████▍     | 47/51 [1:16:43<05:28, 82.01s/it]

Washington


 94%|██████████████████████████████████████████████████████████████████▊    | 48/51 [1:17:27<03:31, 70.51s/it]

West Virginia


 96%|████████████████████████████████████████████████████████████████████▏  | 49/51 [1:18:31<02:17, 68.61s/it]

Wisconsin


 98%|█████████████████████████████████████████████████████████████████████▌ | 50/51 [1:19:47<01:10, 70.80s/it]

Wyoming


100%|███████████████████████████████████████████████████████████████████████| 51/51 [1:20:53<00:00, 95.17s/it]
