# AADT Implementation

## Imports

In [1]:
!pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchmetrics
  Downloading torchmetrics-0.11.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torchmetrics
Successfully installed torchmetrics-0.11.4


In [2]:
import pandas as pd
import numpy as np
import os
import torch
import random
import cv2
from tqdm import tqdm
from matplotlib import pyplot as plt
import albumentations as album
from PIL import Image
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import random_split
from sklearn.metrics import accuracy_score
import plotly.express as px
import torchmetrics
from torchmetrics import MeanAbsolutePercentageError
from glob import glob
import plotly.graph_objs as go

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Global Variables

In [3]:
COLAB = True

In [4]:
GSD = 50

In [5]:
ROOT_DIR_PATH = os.path.abspath('..')

if COLAB:
  from google.colab import drive
  drive.mount('/content/drive')

  ROOT_DIR_PATH = os.path.abspath('drive/MyDrive/Spatial_Finance_Transport/ARoads/')


MODELS_ROOT_PATH = os.path.join(ROOT_DIR_PATH, 'models/aadt_models/')
AADT_ROOT_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/aadt/')
VEHICLE_COUNTS_ROOT_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/vehicle_counts/')
TRUE_SPEED_ROOT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/speed_data/')
TIME_ROOT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/time_data/')
LINK_LENGTH_ROOT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/link_length_data/')

TRAFFIC_COUNTS_ROOT_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/traffic_counts/')

TRANSFORM_ROOT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/aadt/processed/')

COUNT_SITES = ['luton_m1_2557a', 'luton_m1_2557b', 'havering_m25_5790a', 'havering_m25_5790b', 
                           'hounslow_m4_2188a', 'hounslow_m4_2188b',
                           'blackburn_30361033', 'blackburn_30361032']

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# values in miles
BLACKBURN_LINK_LENGTH = 0.94
HOUNSLOW_LINK_LENGTH = 1.07
HAVERING_LINK_LENGTH = 3.79
TRAFFORD_LINK_LENGTH = 0.65
LUTON_LINK_LENGTH = 0.85

In [7]:
NORMALISE_DICT = {
    'Total_N15': 'total_volume',
    'Small_N15': '0-520cm',
    'Medium_N15': '521-660cm',
    'Large_N15': '661-1160cm',
    'Very Large_N15': '1160+cm'
}

In [8]:
COLUMN_NAMES = ['aadt', 'cars_and_taxis', 'buses_and_coaches', 'lgvs', 'all_hgvs']

## Helper Functions

In [9]:
def extract_substring(string):
    """
    Extract a substring from a string based on characters after the last slash ("/") and before the dot (".").

    Args:
        string (str): Input string.

    Returns:
        str: Extracted substring.
    """
    # Find the index of the last slash and the dot
    slash_index = string.rfind("/")
    dot_index = string.find(".")

    # Extract the substring based on the last slash and dot indices
    if slash_index != -1 and dot_index != -1:
        substring = string[slash_index + 1:dot_index]
    else:
        substring = ""

    return substring

In [10]:
def get_files_in_directory(directory):
    """
    Get a list of all files in a directory.

    Args:
        directory (str): Directory path.

    Returns:
        list: List of files in the directory.
    """
    files = []
    for dirpath, dirnames, filenames in os.walk(directory):
        for filename in filenames:
            file_path = os.path.join(dirpath, filename)
            files.append(file_path)
    return files

In [11]:
def categorize_bbox_size(df):
    """
    Categorize the maximum value of (x_max - x_min) and (y_max - y_min) into categories and count occurrences.

    Args:
        df (pd.DataFrame): DataFrame containing the bounding box data.

    Returns:
        list: List of tuples containing the category label and count.
    """
    # Calculate the maximum of (x_max - x_min) and (y_max - y_min) for each row
    df['max_size'] = df[['x_max', 'x_min', 'y_max', 'y_min']].apply(lambda x: max(x[0] - x[1], x[2] - x[3]), axis=1)

    # Define the category labels and corresponding size ranges
    categories = {
        'Small': (0, 520),
        'Medium': (520, 660),
        'Large': (661, 1160),
        'Very Large': (1161, float('inf'))
    }

    # Initialize a dictionary to store counts for each category
    counts = {category: 0 for category in categories}

    # Iterate through each row in the DataFrame
    for index, row in df.iterrows():
        # Get the max size value for the row
        max_size = row['max_size'] * GSD

        # Categorize the max size value and update the counts
        for category, size_range in categories.items():
            if size_range[0] <= max_size < size_range[1]:
                counts[category] += 1

    counts_df = pd.DataFrame([counts])

    # Add a column for the sum of all counts
    counts_df.insert(0, 'Total', counts_df.sum(axis=1))
    return counts_df

In [12]:
def save_float_to_csv(float_values, column_names, image_id, file_name):
    """
    Save float values to a CSV file with the specified column names and file name.

    Args:
        float_values (List[float]): The list of float values to be saved.
        column_names (List[str]): The list of column names in the CSV file.
        image_id (str): The image ID associated with the float values.
        file_name (str): The name of the CSV file to be saved.
    """
    # Create a dictionary of column names and corresponding float values
    data = {'image_id': image_id}
    for name, value in zip(column_names, float_values):
        data[name] = [value]

    # Create a DataFrame from the data dictionary
    df = pd.DataFrame(data)

    # Save the DataFrame to a CSV file
    df.to_csv(file_name, index=False)

In [13]:
def calculate_N15(df_v, df_N, df_l):
    # Merge the three input dataframes on the 'image_id' column

    cols = ['Total', 'Small',	'Medium',	'Large',	'Very Large']
    avg_mph = df_v.iloc[0]['avg_mph']
    link_length = df_l.iloc[0]['link_length']

    for col in cols:

      # Calculate N15 using the formula
      df_N[col+'_N15'] = 0.25 * avg_mph * df_N[col] / link_length
    
    # Return a dataframe with only the 'image_id' and 'N15' columns
    return df_N

In [14]:
import os

def get_files_by_prefix(directory, prefix):
    """
    Returns a list of file paths in a directory that match the start of a string.
    
    Args:
    directory (str): the path to the directory to search in.
    prefix (str): the prefix of the file names to match.
    
    Returns:
    A list of file paths that match the specified prefix.
    """
    matching_files = []
    for filename in os.listdir(directory):
        if prefix in filename:
            file_path = os.path.join(directory, filename)
            if os.path.isfile(file_path):
                matching_files.append(file_path)
    return matching_files

## Model Class

In [15]:
class NeuralNetwork(nn.Module):
    def __init__(self, name):
        super(NeuralNetwork, self).__init__()

        self.name = name
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(9, 7),
            nn.Linear(7,7),
            nn.LeakyReLU(negative_slope=0.2),
            nn.Linear(7,5),
            nn.ReLU()
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

## Load Models

In [16]:
model_paths = get_files_in_directory(MODELS_ROOT_PATH)

model_list = []

for model_path in model_paths:

  name = extract_substring(model_path)

  name = name.replace('nn_model_', '')

  model = NeuralNetwork(name=name)

  model.load_state_dict(torch.load(model_path))

  model.eval()

  print(model.name)

  model_list.append(model)

print(len(model_list))

blackburn_30361033
blackburn_30361032
luton_m1_2557a
havering_m25_5790b
luton_m1_2557b
havering_m25_5790a
trafford_m60_9086b
hounslow_m4_2188a
trafford_m60_9083a
hounslow_m4_2188b
10


## Load Vehicle Counts

Expect results in CSV format with the following:
image_id, x_min, x_max, y_min, y_max, category_name, area


In [17]:
df_vehicle_count_list = []

vehicle_count_paths = get_files_in_directory(VEHICLE_COUNTS_ROOT_PATH)

for vehicle_count_path in vehicle_count_paths:
  df = pd.read_csv(vehicle_count_path)

  print(df.iloc[0]['image_id'])

  df_vehicle_count_list.append(df)

df_vehicle_count_list[0].head()

blackburn_30361032
blackburn_30361033
havering_m25_5790a
havering_m25_5790b
hounslow_m4_2188a
hounslow_m4_2188b
trafford_m60_9083a
trafford_m60_9086b
luton_m1_2557a
luton_m1_2557b


Unnamed: 0,image_id,x_min,x_max,y_min,y_max,category_name,area
0,blackburn_30361032,1067.783325,1079.66449,1027.531982,1036.002106,Small Car,100
1,blackburn_30361032,1975.399071,1986.786041,528.676117,538.260757,Small Car,109
2,blackburn_30361032,1979.620247,1991.10199,514.587952,524.379181,Small Car,112
3,blackburn_30361032,1824.158295,1833.719086,641.94886,650.265575,Small Car,79
4,blackburn_30361032,2237.735962,2248.019989,209.05835,219.792877,Small Car,110


### Calculate vehicle counts 

In [18]:
df_processed_vehicle_counts_list = []

for df in df_vehicle_count_list:
  df_processed_vehicle_count = categorize_bbox_size(df)

  df_processed_vehicle_count['image_id'] = df['image_id'].astype(str)

  print(df_processed_vehicle_count.iloc[0]['image_id'])

  df_processed_vehicle_counts_list.append(df_processed_vehicle_count)

print(len(df_processed_vehicle_counts_list))
df_processed_vehicle_counts_list[0].head()

blackburn_30361032
blackburn_30361033
havering_m25_5790a
havering_m25_5790b
hounslow_m4_2188a
hounslow_m4_2188b
trafford_m60_9083a
trafford_m60_9086b
luton_m1_2557a
luton_m1_2557b
10


Unnamed: 0,Total,Small,Medium,Large,Very Large,image_id
0,13,2,8,3,0,blackburn_30361032


## Load Speed

### True Speed

In [19]:
true_speed_paths = get_files_in_directory(TRUE_SPEED_ROOT_PATH)

df_speed_list = []

for true_speed_path in true_speed_paths:

  df = pd.read_csv(true_speed_path, skipinitialspace=True)

  df['image_id'] = df['image_id'].astype(str)

  print(df.iloc[0]['image_id'])
  df_speed_list.append(df)

print(len(df_speed_list))
df_speed_list[0].head()

luton_m1_2557a
luton_m1_2557b
blackburn_30361032
trafford_m60_9083a
trafford_m60_9086b
blackburn_30361033
havering_m25_5790b
havering_m25_5790a
hounslow_m4_2188a
hounslow_m4_2188b
10


Unnamed: 0,image_id,avg_mph
0,luton_m1_2557a,65


## Convert Vehicle Counts to Traffic Counts

$$ N_{15} = 15 \frac{vN}{l} $$

In [20]:
link_length_paths = get_files_in_directory(LINK_LENGTH_ROOT_PATH)

df_link_length_list = []

for link_length_path in link_length_paths:

  df = pd.read_csv(link_length_path, skipinitialspace=True)

  df['image_id'] = df['image_id'].astype(str)

  print(df.iloc[0]['image_id'])
  df_link_length_list.append(df)

print(len(df_link_length_list))
df_link_length_list[0].head()

trafford_m60_9086b
hounslow_m4_2188a
blackburn_30361032
luton_m1_2557b
hounslow_m4_2188b
blackburn_30361033
havering_m25_5790b
luton_m1_2557a
havering_m25_5790a
trafford_m60_9083a
10


Unnamed: 0,image_id,link_length
0,trafford_m60_9086b,1.05


In [21]:
for df_processed_vehicle_counts in df_processed_vehicle_counts_list:

  for df_speed in df_speed_list:

    for df_link_length in df_link_length_list:

      if ( df_processed_vehicle_counts.iloc[0]['image_id'] == df_speed.iloc[0]['image_id'] ) and ( df_processed_vehicle_counts.iloc[0]['image_id']  == df_link_length.iloc[0]['image_id'] ):

        print("found match for: {}".format(df_link_length.iloc[0]['image_id']))

        image_id = df_link_length.iloc[0]['image_id']
        
        df_traffic_count = calculate_N15(df_speed, df_processed_vehicle_counts, df_link_length)

        df_traffic_count.to_csv(TRAFFIC_COUNTS_ROOT_PATH+'traffic_count_'+image_id+'.csv')

print(len(df_processed_vehicle_counts_list))
df_processed_vehicle_counts_list[0].head()

found match for: blackburn_30361032
found match for: blackburn_30361033
found match for: havering_m25_5790a
found match for: havering_m25_5790b
found match for: hounslow_m4_2188a
found match for: hounslow_m4_2188b
found match for: trafford_m60_9083a
found match for: trafford_m60_9086b
found match for: luton_m1_2557a
found match for: luton_m1_2557b
10


Unnamed: 0,Total,Small,Medium,Large,Very Large,image_id,Total_N15,Small_N15,Medium_N15,Large_N15,Very Large_N15
0,13,2,8,3,0,blackburn_30361032,138.980263,21.381579,85.526316,32.072368,0.0


### Transform

In [22]:
transform_prefix = 'transform'

transform_paths = get_files_by_prefix(TRANSFORM_ROOT_PATH, transform_prefix)

df_transform_list = []

for transform_path in transform_paths:

  df = pd.read_csv(transform_path)

  df = df.set_index('Unnamed: 0')

  df.name = extract_substring(transform_path).lower()

  df_transform_list.append(df)

df_transform_list[0].head()

Unnamed: 0_level_0,0-520cm,521-660cm,661-1160cm,1160+cm,total_volume
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
min,0,0,0,0,1
max,1181,84,72,51,1250


In [23]:
transform_cols = ['Total_N15', 'Small_N15', 'Medium_N15', 'Large_N15', 'Very Large_N15']

for df_transform in df_transform_list:

  for df_processed_vehicle_counts in df_processed_vehicle_counts_list:

    if df_transform.name[-5:] in df_processed_vehicle_counts.iloc[0]['image_id']:
      
      print("found a match for: {}".format(df_processed_vehicle_counts.iloc[0]['image_id']))

      for transform_col in transform_cols:

        min_val, max_val = df_transform.loc['min', NORMALISE_DICT[transform_col]], df_transform.loc['max', NORMALISE_DICT[transform_col]]

        df_processed_vehicle_counts.loc[:, transform_col] = (df_processed_vehicle_counts[transform_col] - min_val) / (max_val - min_val)

df_processed_vehicle_counts_list[0].head()

found a match for: blackburn_30361033
found a match for: blackburn_30361032
found a match for: havering_m25_5790a
found a match for: havering_m25_5790b
found a match for: luton_m1_2557a
found a match for: luton_m1_2557b
found a match for: hounslow_m4_2188a
found a match for: hounslow_m4_2188b
found a match for: trafford_m60_9086b
found a match for: trafford_m60_9083a


Unnamed: 0,Total,Small,Medium,Large,Very Large,image_id,Total_N15,Small_N15,Medium_N15,Large_N15,Very Large_N15
0,13,2,8,3,0,blackburn_30361032,0.109944,0.019211,1.05588,0.391126,0.0


## Load Time Data

In [24]:
time_paths = get_files_in_directory(TIME_ROOT_PATH)

df_time_list = []

for time_path in time_paths:

  df = pd.read_csv(time_path)

  print(df.iloc[0]['image_id'])
  
  df_time_list.append(df)

print(len(df_time_list))
df_time_list[0].head()

blackburn_30361032
blackburn_30361033
havering_m25_5790a
havering_m25_5790b
hounslow_m4_2188a
hounslow_m4_2188b
luton_m1_2557a
luton_m1_2557b
trafford_m60_9083a
trafford_m60_9086b
10


Unnamed: 0,image_id,day,month,hour
0,blackburn_30361032,24,2,11


## Concatenation

In [25]:
df_aadt_features_list = []

for df_processed_vehicle_counts in df_processed_vehicle_counts_list:

  for df_speed in df_speed_list:

    for df_time in df_time_list:

      if ( df_processed_vehicle_counts.iloc[0]['image_id'] == df_speed.iloc[0]['image_id'] ) and ( df_processed_vehicle_counts.iloc[0]['image_id']  == df_time.iloc[0]['image_id'] ):

        print("Found match for: {}".format(df_processed_vehicle_counts.iloc[0]['image_id']))

        df = pd.concat([df_processed_vehicle_counts[['image_id', 'Total_N15',	'Small_N15', 'Medium_N15', 'Large_N15', 'Very Large_N15']], df_speed.drop(['image_id'], axis=1), df_time.drop(['image_id'], axis=1)], axis=1)

        print(df.iloc[0]['image_id'])

        df_aadt_features_list.append(df)

print(len(df_aadt_features_list))
df_aadt_features_list[1].head()

Found match for: blackburn_30361032
blackburn_30361032
Found match for: blackburn_30361033
blackburn_30361033
Found match for: havering_m25_5790a
havering_m25_5790a
Found match for: havering_m25_5790b
havering_m25_5790b
Found match for: hounslow_m4_2188a
hounslow_m4_2188a
Found match for: hounslow_m4_2188b
hounslow_m4_2188b
Found match for: trafford_m60_9083a
trafford_m60_9083a
Found match for: trafford_m60_9086b
trafford_m60_9086b
Found match for: luton_m1_2557a
luton_m1_2557a
Found match for: luton_m1_2557b
luton_m1_2557b
10


Unnamed: 0,image_id,Total_N15,Small_N15,Medium_N15,Large_N15,Very Large_N15,avg_mph,day,month,hour
0,blackburn_30361033,0.273103,0.0,0.254543,4.157529,0.419247,65,24,2,11


## Prediction

In [26]:
i = 0

for df_aadt_features in df_aadt_features_list:

  for model in model_list:

    if 'image_id' in df_aadt_features.columns:

      if df_aadt_features.iloc[0]['image_id'] == model.name:

        df_aadt_features = df_aadt_features.drop(['image_id'], axis=1)

        print("Local Authority Count Site: {} \n\nInput Features: \n {}\n".format(model.name, df_aadt_features))

        x = torch.tensor(df_aadt_features.iloc[0].values, dtype=torch.float32).float()

        y = np.round(model(x).detach().numpy(), 2)

        print("AADT Prediction: {}".format(y))

        print("Saving to csv: {}".format(AADT_ROOT_PATH+'aadt_'+model.name+'.csv'))

        save_float_to_csv(y, COLUMN_NAMES, model.name, AADT_ROOT_PATH+'aadt_'+model.name+'.csv')

        i = i + 1

        print("---------------------------------------")
    #else:
      #print("df does not have image_id column!")

      #print("---------------------------------------")

print("Number of predictions made: {}".format(i))

Local Authority Count Site: blackburn_30361032 

Input Features: 
    Total_N15  Small_N15  Medium_N15  Large_N15  Very Large_N15  avg_mph   day  \
0   0.109944   0.019211     1.05588   0.391126             0.0       65    24   

    month   hour  
0       2     11  

AADT Prediction: [21257.51 19048.04   211.69  1969.94   799.12]
Saving to csv: /content/drive/MyDrive/Spatial_Finance_Transport/ARoads/data/predicted/aadt/aadt_blackburn_30361032.csv
---------------------------------------
Local Authority Count Site: blackburn_30361033 

Input Features: 
    Total_N15  Small_N15  Medium_N15  Large_N15  Very Large_N15  avg_mph   day  \
0   0.273103        0.0    0.254543   4.157529        0.419247       65    24   

    month   hour  
0       2     11  

AADT Prediction: [21863.6  19586.16   224.71  2023.71   827.62]
Saving to csv: /content/drive/MyDrive/Spatial_Finance_Transport/ARoads/data/predicted/aadt/aadt_blackburn_30361033.csv
---------------------------------------
Local Authority 