# Create Flow Probability Distribution

In [22]:
import pandas as pd
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import json
import datetime as dt

## Create sets
Filter based on timestamp. Only values between start and end time will be stored in the set. In this instance, the sets are hourly. Multiple sets are collected to create FPDs over the whole day.

In [23]:
def import_intersection(intersection_name):
    intersection_data = pd.read_csv('../intersection_selection/'+intersection_name+'.csv')
    intersection_data.index=pd.DatetimeIndex(intersection_data.timestamp)
    intersection_data.index.name = None
    return intersection_data[[intersection_name]]

In [24]:
def create_sets(start_time,end_time,intersection_data):
    return intersection_data.between_time(start_time,end_time,include_end=False)

## Probability Mass Function
Take a set and perform the PMF for it, then return the unique traffic flow values and their probability.

In [25]:
def probability_mass_function(intersection_set):
    total_occurences = intersection_set[intersection_set.columns[0]].values
    unique_occurences = intersection_set[intersection_set.columns[0]].unique()
    unique_occurences.sort()
    occurence_counter = Counter(total_occurences)
    fpd = []
    for i in range(len(unique_occurences)):
        fpd.append(occurence_counter[unique_occurences[i]]/len(total_occurences))
    return [list([int(flow_value) for flow_value in unique_occurences]), fpd]

## Create dictionary
Seperate function that sets up the dictionary in which the fpds will be saved.

In [26]:
def create_dictionary(intersections):
    fpds = {}
    hours = [(dt.time(i).strftime('%H')) for i in range(24)] #Create list of hours
    for i in range(len(intersections)): #each intersection
        fpds[intersections[i]] = {}
        for weekday in range(7): #Set up day of the week
            fpds[intersections[i]][weekday] = {}
            for hour in hours: #Set up hour of the day
                fpds[intersections[i]][weekday][hour] = {}
    return fpds

## Create FPDs
Main function to create FPDs. Uses previous functions to combine it all together and store the fpds in a dictionary.

In [27]:
def create_fpds(intersections):
    fpds = create_dictionary(intersections)
    hours = [(dt.time(i).strftime('%H')) for i in range(24)]
    
    for i in range(len(intersections)): #each intersection
        intersection_data = import_intersection(intersections[i])
        days = [group[1] for group in intersection_data.groupby(intersection_data.index.date)]
        for day in days: #each date
            date = str(day.index.date[0])
            weekday = day.index.date[0].weekday()
            for j in range(len(hours)): #each hour of the day
                start_time = hours[j]+':00'
                if j==23: 
                    end_time = hours[0]+':00'
                else:
                    end_time = hours[j+1]+':00'
                
                intersection_set = create_sets(start_time,end_time,day)
                fpd = probability_mass_function(intersection_set)
                
                if sum(fpd[0])/len(fpd[0]) == 0:
                    #print('Found 0,0,0,0 at: ',intersections[i],weekday,start_time[:2],date)
                    pass
                else:
                    fpds[intersections[i]][weekday][start_time[:2]][date] = fpd              
    return fpds

In [7]:
fpds = create_fpds(['K302','K173','K414','K158','K402','K305'])

In [9]:
with open('./edited_data/fpd_collection.json', 'w') as f:
    json.dump(fpds, f)