# Evaluation

**TODO**:
- Add $R^2$ values to evaluation metrics
- Add speed estimation

## Imports

In [1]:
!pip install -U kaleido

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


In [2]:
import os
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
import numpy as np
import plotly.graph_objs as go
from sklearn.metrics import r2_score

## Global Variables

In [3]:
COLAB = True

In [4]:
ROOT_DIR_PATH = os.path.abspath('..')

if COLAB:

  from google.colab import drive
  drive.mount('/content/drive')

  from google.colab import files

  ROOT_DIR_PATH = os.path.abspath('drive/MyDrive/Spatial_Finance_Transport/ARoads/')

TRUE_AADT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/aadt/')

PRED_AADT_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/aadt/')

TRUE_GHG_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/ghg_emissions/')

PRED_GHG_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/ghg_emissions/')

TRUE_TRAFFIC_COUNT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/traffic_counts/')

PRED_TRAFFIC_COUNT_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/traffic_counts/')

PLOT_DIR = os.path.join(ROOT_DIR_PATH, 'data/predicted/results/plots/')

METRICS_DIR = os.path.join(ROOT_DIR_PATH, 'data/predicted/results/metrics/')

ADMIN_DIR = os.path.join(ROOT_DIR_PATH, 'admin/')

CHOSEN_COUNT_SITES = [('Luton', 'M1/2557A', 'M1/2557B'), ('Hounslow', 'M4/2188A', 'M4/2188B'), ('Enfield', 'M25/5441A', 'M25/5441B'), 
                      ('Blackburn with Darwen', '30361033', '30361032'), ('Havering', 'M25/5790A', 'M25/5790B'), ('Trafford', 'M60/9083A', 'M60/9086B')]

Mounted at /content/drive


In [5]:
NORMALISE_DICT = {
    'Total_N15': 'Total Volume',
    'Small_N15': '0-520cm',
    'Medium_N15': '521-660cm',
    'Large_N15': '661-1160cm',
    'Very Large_N15': '1160+cm'
}

MODE = 'median' # from max, median, mean

## Helper Functions

In [6]:
def get_files_by_prefix(directory, prefix):
    """
    Returns a list of file paths in a directory that match the start of a string.
    
    Args:
    directory (str): the path to the directory to search in.
    prefix (str): the prefix of the file names to match.
    
    Returns:
    A list of file paths that match the specified prefix.
    """
    matching_files = []
    for filename in os.listdir(directory):
        if prefix in filename:
            file_path = os.path.join(directory, filename)
            if os.path.isfile(file_path):
                matching_files.append(file_path)
    return matching_files

In [7]:
def match_before_underscore_or_space(str1, str2):
    """Checks if two strings match in elements before the first underscore or space.

    Args:
        str1 (str): The first string.
        str2 (str): The second string.

    Returns:
        bool: True if the strings match in elements before the first underscore or space, False otherwise.
    """
    # Find the index of the first underscore or space in both strings
    index1 = min(str1.find("_"), str1.find(" ")) if (str1.find("_") != -1 and str1.find(" ") != -1) else max(str1.find("_"), str1.find(" "))
    index2 = min(str2.find("_"), str2.find(" ")) if (str2.find("_") != -1 and str2.find(" ") != -1) else max(str2.find("_"), str2.find(" "))
    
    # Extract the substring before the first underscore or space from both strings
    sub_str1 = str1[:index1] if index1 >= 0 else str1
    sub_str2 = str2[:index2] if index2 >= 0 else str2
    
    # Compare the two substrings
    return sub_str1 == sub_str2

## AADT

### Load true data

In [8]:
df_true_aadt_list = []

prefix = 'all_motor_vehicles'

true_aadt_paths = get_files_by_prefix(TRUE_AADT_PATH, prefix)

for true_aadt_path in true_aadt_paths:
  df = pd.read_csv(true_aadt_path)

  print(df.iloc[0]['Local Authority'])

  df_true_aadt_list.append(df)

print("df list length: {}".format(len(df_true_aadt_list)))
df_true_aadt_list[0].head()

Luton
Hounslow
Enfield
Trafford
Havering
Blackburn with Darwen
df list length: 6


Unnamed: 0.1,Unnamed: 0,year,cars_and_taxis,buses_and_coaches,lgvs,all_hgvs,all_motor_vehicles,Local Authority
0,0,2005,22503.0,466.0,2257.0,895.0,25841.0,Luton
1,1,2006,20746.0,396.0,3022.0,996.0,24082.0,Luton
2,2,2007,22281.0,504.0,2781.0,975.0,25750.0,Luton
3,3,2008,19143.0,508.0,2769.0,772.0,22387.0,Luton
4,4,2009,18837.0,497.0,2742.0,724.0,22219.0,Luton


### Load predicted data

In [9]:
df_pred_aadt_list = []

prefix = 'aadt_'

pred_aadt_paths = get_files_by_prefix(PRED_AADT_PATH, prefix)

for pred_aadt_path in pred_aadt_paths:
  df = pd.read_csv(pred_aadt_path)

  print(df.iloc[0]['image_id'])

  df_pred_aadt_list.append(df)

print("df list length: {}".format(len(df_pred_aadt_list)))
df_pred_aadt_list[0].head()

blackburn_30361032
havering_m25_5790a
blackburn_30361033
havering_m25_5790b
hounslow_m4_2188a
trafford_m60_9083a
hounslow_m4_2188b
trafford_m60_9086b
luton_m1_2557a
luton_m1_2557b
df list length: 10


Unnamed: 0,image_id,aadt,cars_and_taxis,buses_and_coaches,lgvs,all_hgvs
0,blackburn_30361032,21257.51,19048.04,211.69,1969.94,799.12


### Average predictions

In [10]:
for df_pred_aadt_1 in df_pred_aadt_list:

  image_id_1 = df_pred_aadt_1.iloc[0]['image_id']

  aadt_1 = df_pred_aadt_1.iloc[0]['aadt']

  for df_pred_aadt_2 in df_pred_aadt_list:

    image_id_2 = df_pred_aadt_2.iloc[0]['image_id']

    aadt_2 = df_pred_aadt_2.iloc[0]['aadt']

    if match_before_underscore_or_space(image_id_1, image_id_2) and (image_id_1 != image_id_2):

      print("found match for: {}".format(image_id_1))

      mean_aadt = ( aadt_1 + aadt_2 ) / 2

      df_pred_aadt_1['mean_aadt'] = mean_aadt
      df_pred_aadt_2['mean_aadt'] = mean_aadt

df_pred_aadt_list[0].head()

found match for: blackburn_30361032
found match for: havering_m25_5790a
found match for: blackburn_30361033
found match for: havering_m25_5790b
found match for: hounslow_m4_2188a
found match for: trafford_m60_9083a
found match for: hounslow_m4_2188b
found match for: trafford_m60_9086b
found match for: luton_m1_2557a
found match for: luton_m1_2557b


Unnamed: 0,image_id,aadt,cars_and_taxis,buses_and_coaches,lgvs,all_hgvs,mean_aadt
0,blackburn_30361032,21257.51,19048.04,211.69,1969.94,799.12,21560.555


### Add true aadt column to predicted

In [11]:
df_aadt = pd.DataFrame()

for df_true_aadt in df_true_aadt_list:

  la_name = df_true_aadt.iloc[0]['Local Authority']

  true_aadt = df_true_aadt.loc[df_true_aadt['year'] == 2018]['all_motor_vehicles'].values

  for df_pred_aadt in df_pred_aadt_list:

    image_id = df_pred_aadt.iloc[0]['image_id']

    if match_before_underscore_or_space(la_name.lower(), image_id):

      print("found match for: {}".format(image_id))

      df_pred_aadt['true_aadt'] = true_aadt

      df_aadt = pd.concat([df_aadt, df_pred_aadt], ignore_index=True)

df_aadt

found match for: luton_m1_2557a
found match for: luton_m1_2557b
found match for: hounslow_m4_2188a
found match for: hounslow_m4_2188b
found match for: trafford_m60_9083a
found match for: trafford_m60_9086b
found match for: havering_m25_5790a
found match for: havering_m25_5790b
found match for: blackburn_30361032
found match for: blackburn_30361033


Unnamed: 0,image_id,aadt,cars_and_taxis,buses_and_coaches,lgvs,all_hgvs,mean_aadt,true_aadt
0,luton_m1_2557a,27545.24,23811.32,579.04,2983.71,894.38,27608.9,25477.0
1,luton_m1_2557b,27672.56,23927.68,582.78,3001.21,899.21,27608.9,25477.0
2,hounslow_m4_2188a,57035.46,47782.01,1357.81,9727.59,2005.21,60784.41,56531.0
3,hounslow_m4_2188b,64533.36,54096.74,1550.85,10885.01,2233.67,60784.41,56531.0
4,trafford_m60_9083a,35333.84,31227.3,403.36,3524.59,1845.42,35976.83,34266.0
5,trafford_m60_9086b,36619.82,32383.09,415.93,3662.84,1913.9,35976.83,34266.0
6,havering_m25_5790a,44657.6,29890.57,1664.71,9618.61,5604.27,44823.93,50266.0
7,havering_m25_5790b,44990.26,29881.18,1685.31,9694.4,5671.72,44823.93,50266.0
8,blackburn_30361032,21257.51,19048.04,211.69,1969.94,799.12,21560.555,18027.0
9,blackburn_30361033,21863.6,19586.16,224.71,2023.71,827.62,21560.555,18027.0


### AADT Evaluation

In [12]:
df_aadt_results = pd.DataFrame(df_aadt)

# group dataframe by image_id and calculate RMSE and MAPE for each group
grouped = df_aadt.groupby('image_id')
df_aadt_results = grouped.apply(lambda x: pd.Series({'RMSE': mean_squared_error(x['true_aadt'], x['mean_aadt'], squared=False),
                                                      'MAPE': mean_absolute_percentage_error(x['true_aadt'], x['mean_aadt'])}))

# print the resulting dataframe with image_id as the index
result = df_aadt_results.reset_index()
result = result.rename(columns={'index': 'image_id'})
result = result[['image_id', 'RMSE', 'MAPE']]
result.set_index('image_id', inplace=True)

# add a row at the bottom of the dataframe with the average of the RMSE and MAPE columns
avg_row = result.mean()
avg_row.name = 'Average'
result = result.append(avg_row)

# x and y values of the data points
x = df_aadt['mean_aadt']
y = df_aadt['true_aadt']

# coefficients of the line of best fit
coefficients = np.polyfit(x, y, 1)

# predicted y values using the line of best fit
y_predicted = np.polyval(coefficients, x)

# R-squared value of the line of best fit
r_squared = r2_score(y, y_predicted)

print("R-squared value of the line of best fit:", r_squared)

result.to_csv(METRICS_DIR+'aroads_aadt_'+MODE+'_metrics.csv')

result

R-squared value of the line of best fit: 0.9438786233578671


  result = result.append(avg_row)


Unnamed: 0_level_0,RMSE,MAPE
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1
blackburn_30361032,3533.555,0.196015
blackburn_30361033,3533.555,0.196015
havering_m25_5790a,5442.07,0.108265
havering_m25_5790b,5442.07,0.108265
hounslow_m4_2188a,4253.41,0.07524
hounslow_m4_2188b,4253.41,0.07524
luton_m1_2557a,2131.9,0.083679
luton_m1_2557b,2131.9,0.083679
trafford_m60_9083a,1710.83,0.049928
trafford_m60_9086b,1710.83,0.049928


In [13]:
# create a scatter plot with a line of best fit
fig = go.Figure()

for image_id in df_aadt['image_id'].unique():
    fig.add_trace(go.Scatter(
        x=df_aadt[df_aadt['image_id'] == image_id]['aadt'],
        y=df_aadt[df_aadt['image_id'] == image_id]['true_aadt'], 
        mode='markers',
        marker={'size': 10},
        name=image_id,
        text=df_aadt[df_aadt['image_id'] == image_id]['image_id']
    ))

fig.add_trace(go.Scatter(
    x=df_aadt['aadt'],
    y=np.polyval(np.polyfit(df_aadt['aadt'], df_aadt['true_aadt'], 1), df_aadt['aadt']),
    mode='lines',
    marker={
        'color': 'black'
    },
    name='Line of Best Fit'
))

fig.add_trace(go.Scatter(
    x=df_aadt['aadt'],
    y=df_aadt['aadt'],  # Use the x-values as y-values to create a diagonal line
    mode='lines',
    marker={
        'color': 'black'
    },
    line={
        'dash': 'dash'
    },
    name='Equal Line'
))

fig.update_layout(
    title='A-Roads Predicted AADT vs True AADT for Chosen LAs',
    xaxis_title='Pred AADT',
    yaxis_title='True AADT',
    legend_title='Image ID',
    width=1000,
    height=1000,
    xaxis=dict(
        scaleanchor="y",
        scaleratio=1,
        range=[min( min(df_aadt['aadt']), min(df_aadt['true_aadt']) ) - 2000, max( max(df_aadt['aadt']), max(df_aadt['true_aadt']) ) + 2000]  # Set x-axis range
    ),
    yaxis=dict(
        scaleanchor="x",
        scaleratio=1,
        range=[min( min(df_aadt['aadt']), min(df_aadt['true_aadt']) ) - 2000, max( max(df_aadt['aadt']), max(df_aadt['true_aadt']) ) + 2000]  # Set y-axis range
    )
)

# Save the plot as an image
file_path = os.path.join(PLOT_DIR, 'a_roads_aadt_eval_'+MODE+'_plot.png')
fig.write_image(file_path)

fig.show()

## GHG Emissions

### Load true data

In [14]:
df_true_ghg_list = []

prefix = 'ghg'

true_ghg_paths = get_files_by_prefix(TRUE_GHG_PATH, prefix)

for true_ghg_path in true_ghg_paths:
  df = pd.read_csv(true_ghg_path)

  print(df.iloc[0]['Local Authority'])

  df_true_ghg_list.append(df)

print("df list length: {}".format(len(df_true_ghg_list)))
df_true_ghg_list[0].head()

Luton
Enfield
Havering
Hounslow
Trafford
Blackburn with Darwen
df list length: 6


Unnamed: 0.1,Unnamed: 0,year,Annual Territorial emissions (kt CO2e),Local Authority
0,0,2005,74.510282,Luton
1,1,2006,71.869381,Luton
2,2,2007,71.532634,Luton
3,3,2008,64.632141,Luton
4,4,2009,63.204614,Luton


### Load predicted data

In [15]:
df_pred_ghg_list = []

prefix = 'ghg_'

pred_ghg_paths = get_files_by_prefix(PRED_GHG_PATH, prefix)

for pred_ghg_path in pred_ghg_paths:
  df = pd.read_csv(pred_ghg_path)

  print(df.iloc[0]['image_id'])

  df_pred_ghg_list.append(df)

print("df list length: {}".format(len(df_pred_ghg_list)))
df_pred_ghg_list[0].head()

blackburn_30361032
blackburn_30361033
hounslow_m4_2188a
havering_m25_5790b
havering_m25_5790a
trafford_m60_9083a
hounslow_m4_2188b
trafford_m60_9086b
luton_m1_2557a
luton_m1_2557b
df list length: 10


Unnamed: 0,image_id,ghg_emissions
0,blackburn_30361032,39.414717


### Average predictions

In [16]:
for df_pred_ghg_1 in df_pred_ghg_list:

  image_id_1 = df_pred_ghg_1.iloc[0]['image_id']

  ghg_1 = df_pred_ghg_1.iloc[0]['ghg_emissions']

  for df_pred_ghg_2 in df_pred_ghg_list:

    image_id_2 = df_pred_ghg_2.iloc[0]['image_id']

    ghg_2 = df_pred_ghg_2.iloc[0]['ghg_emissions']

    if match_before_underscore_or_space(image_id_1, image_id_2) and (image_id_1 != image_id_2):

      print("found match for: {}".format(image_id_1))

      mean_ghg = ( ghg_1 + ghg_2 ) / 2

      df_pred_ghg_1['mean_ghg'] = mean_ghg
      df_pred_ghg_2['mean_ghg'] = mean_ghg

df_pred_ghg_list[0].head()

found match for: blackburn_30361032
found match for: blackburn_30361033
found match for: hounslow_m4_2188a
found match for: havering_m25_5790b
found match for: havering_m25_5790a
found match for: trafford_m60_9083a
found match for: hounslow_m4_2188b
found match for: trafford_m60_9086b
found match for: luton_m1_2557a
found match for: luton_m1_2557b


Unnamed: 0,image_id,ghg_emissions,mean_ghg
0,blackburn_30361032,39.414717,40.076037


### Add true ghg column to predicted

In [17]:
df_ghg = pd.DataFrame(columns=['image_id', 'ghg', 'true_ghg', 'mean_ghg'])

for df_true_ghg in df_true_ghg_list:

    la_name = df_true_ghg.iloc[0]['Local Authority']

    true_ghg = df_true_ghg.loc[df_true_ghg['year'] == 2018]['Annual Territorial emissions (kt CO2e)'].values[0]

    for df_pred_ghg in df_pred_ghg_list:

        image_id = df_pred_ghg.iloc[0]['image_id']

        pred_ghg = df_pred_ghg.iloc[0]['mean_ghg']

        ghg_emissions = df_pred_ghg.iloc[0]['ghg_emissions']

        if match_before_underscore_or_space(la_name.lower(), image_id):

          print("found match for: {}".format(image_id))

          df_ghg.loc[len(df_ghg)] = [image_id, ghg_emissions, true_ghg, pred_ghg]

df_ghg

found match for: luton_m1_2557a
found match for: luton_m1_2557b
found match for: havering_m25_5790b
found match for: havering_m25_5790a
found match for: hounslow_m4_2188a
found match for: hounslow_m4_2188b
found match for: trafford_m60_9083a
found match for: trafford_m60_9086b
found match for: blackburn_30361032
found match for: blackburn_30361033


Unnamed: 0,image_id,ghg,true_ghg,mean_ghg
0,luton_m1_2557a,37.033327,55.167324,37.135259
1,luton_m1_2557b,37.23719,55.167324,37.135259
2,havering_m25_5790b,162.226071,149.200477,161.432228
3,havering_m25_5790a,160.638385,149.200477,161.432228
4,hounslow_m4_2188a,194.207449,183.956288,206.551773
5,hounslow_m4_2188b,218.896097,183.956288,206.551773
6,trafford_m60_9083a,77.712176,93.998716,79.136372
7,trafford_m60_9086b,80.560568,93.998716,79.136372
8,blackburn_30361032,39.414717,54.864172,40.076037
9,blackburn_30361033,40.737357,54.864172,40.076037


### GHG Evaluation

In [18]:
df_ghg_results = pd.DataFrame(df_ghg)

# group dataframe by image_id and calculate RMSE and MAPE for each group
grouped = df_ghg.groupby('image_id')
df_ghg_results = grouped.apply(lambda x: pd.Series({'RMSE': mean_squared_error(x['true_ghg'], x['mean_ghg'], squared=False),
                                                      'MAPE': mean_absolute_percentage_error(x['true_ghg'], x['mean_ghg'])}))

# print the resulting dataframe with image_id as the index
result = df_ghg_results.reset_index()

result = result.rename(columns={'index': 'image_id'})
result = result[['image_id', 'RMSE', 'MAPE']]

result.set_index('image_id', inplace=True)

# add a row at the bottom of the dataframe with the average of the RMSE and MAPE columns
avg_row = result.mean()
avg_row.name = 'Average'
result = result.append(avg_row)

# x and y values of the data points
x = df_ghg['mean_ghg']
y = df_ghg['true_ghg']

# coefficients of the line of best fit
coefficients = np.polyfit(x, y, 1)

# predicted y values using the line of best fit
y_predicted = np.polyval(coefficients, x)

# R-squared value of the line of best fit
r_squared = r2_score(y, y_predicted)

print("R-squared value of the line of best fit:", r_squared)

result.to_csv(METRICS_DIR+'a_roads_ghg_'+MODE+'metrics.csv')

result

R-squared value of the line of best fit: 0.9961450508681159



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0_level_0,RMSE,MAPE
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1
blackburn_30361032,14.788135,0.269541
blackburn_30361033,14.788135,0.269541
havering_m25_5790a,12.231751,0.081982
havering_m25_5790b,12.231751,0.081982
hounslow_m4_2188a,22.595486,0.122831
hounslow_m4_2188b,22.595486,0.122831
luton_m1_2557a,18.032066,0.326861
luton_m1_2557b,18.032066,0.326861
trafford_m60_9083a,14.862344,0.158112
trafford_m60_9086b,14.862344,0.158112


In [19]:
import numpy as np
import plotly.graph_objects as go

# create a scatter plot with a line of best fit
fig = go.Figure()

for image_id in df_ghg['image_id'].unique():
    fig.add_trace(go.Scatter(
        x=df_ghg[df_ghg['image_id'] == image_id]['ghg'],
        y=df_ghg[df_ghg['image_id'] == image_id]['true_ghg'], 
        mode='markers',
        marker={'size': 10},
        name=image_id,
        text=df_ghg[df_ghg['image_id'] == image_id]['image_id']
    ))

fig.add_trace(go.Scatter(
    x=df_ghg['ghg'],
    y=np.polyval(np.polyfit(df_ghg['ghg'], df_ghg['true_ghg'], 1), df_ghg['ghg']),
    mode='lines',
    marker={
        'color': 'black'
    },
    name='Line of Best Fit'
))

fig.add_trace(go.Scatter(
    x=df_ghg['ghg'],
    y=df_ghg['ghg'],  # Use the x-values as y-values to create a diagonal line
    mode='lines',
    marker={
        'color': 'black'
    },
    line={
        'dash': 'dash'
    },
    name='Equal Line'
))

fig.update_layout(
    title='A-Roads Predicted GHG vs True GHG for Chosen LAs',
    xaxis_title='Pred GHG',
    yaxis_title='True GHG',
    legend_title='Image ID',
    width=1000,
    height=1000,
    xaxis=dict(
        scaleanchor="y",
        scaleratio=1,
        range=[min( min(df_ghg['ghg']), min(df_ghg['true_ghg']) ) - 20, max( max(df_ghg['ghg']), max(df_ghg['true_ghg']) ) + 20]  # Set x-axis range
    ),
    yaxis=dict(
        scaleanchor="x",
        scaleratio=1,
        range=[min( min(df_ghg['ghg']), min(df_ghg['true_ghg']) ) - 20, max( max(df_ghg['ghg']), max(df_ghg['true_ghg']) ) + 20]  # Set y-axis range
    )
)

# Save the plot as an image
file_path = os.path.join(PLOT_DIR, 'aroads_ghg_eval_'+MODE+'_plot.png')
fig.write_image(file_path)

fig.show()

## 15 Minute Traffic Count

### Load true data

In [20]:
df_true_traffic_count = pd.DataFrame()

prefix = 'traffic_count'

true_traffic_count_paths = get_files_by_prefix(TRUE_TRAFFIC_COUNT_PATH, prefix)

for true_traffic_count_path in true_traffic_count_paths:
  df = pd.read_csv(true_traffic_count_path, skipinitialspace=True)

  print(df.iloc[0]['image_id'])

  df_true_traffic_count = pd.concat([df_true_traffic_count, df], axis=0)

print("df list length: {}".format(len(df_true_traffic_count)))
df_true_traffic_count.head()

luton_m1_2557a
luton_m1_2557b
blackburn_30361033
blackburn_30361032
havering_m25_5790a
havering_m25_5790b
hounslow_m4_2188b
hounslow_m4_2188a
trafford_m60_9083a
trafford_m60_9086b
df list length: 10


Unnamed: 0,image_id,0-520cm,521-660cm,661-1160cm,1160+cm,Total Volume
0,luton_m1_2557a,753,277,34,59,1123
0,luton_m1_2557b,1045,33,16,63,1157
0,blackburn_30361033,504,31,18,7,560
0,blackburn_30361032,436,13,10,7,466
0,havering_m25_5790a,801,104,89,182,1176


### Load predicted data

In [21]:
df_pred_traffic_count = pd.DataFrame()

prefix = 'traffic_count'

pred_traffic_count_paths = get_files_by_prefix(PRED_TRAFFIC_COUNT_PATH, prefix)

for pred_traffic_count_path in pred_traffic_count_paths:
  df = pd.read_csv(pred_traffic_count_path, skipinitialspace=True)

  print(df.iloc[0]['image_id'])

  df_pred_traffic_count = pd.concat([df_pred_traffic_count, df], axis=0)

print("df list length: {}".format(len(df_pred_traffic_count)))
df_pred_traffic_count.head()

blackburn_30361032
blackburn_30361033
havering_m25_5790a
havering_m25_5790b
hounslow_m4_2188b
hounslow_m4_2188a
trafford_m60_9083a
trafford_m60_9086b
luton_m1_2557a
luton_m1_2557b
df list length: 10


Unnamed: 0.1,Unnamed: 0,Total,Small,Medium,Large,Very Large,image_id,Total_N15,Small_N15,Medium_N15,Large_N15,Very Large_N15
0,0,13,2,8,3,0,blackburn_30361032,138.980263,21.381579,85.526316,32.072368,0.0
0,0,32,0,2,28,2,blackburn_30361033,342.105263,0.0,21.381579,299.342105,21.381579
0,0,86,3,13,37,33,havering_m25_5790a,222.04918,7.745902,33.565574,95.532787,85.204918
0,0,171,25,29,71,46,havering_m25_5790b,441.516393,64.54918,74.877049,183.319672,118.770492
0,0,38,4,7,18,9,hounslow_m4_2188b,356.936416,37.572254,65.751445,169.075145,84.537572


### Traffic Count Evaluation

In [22]:
# Merge the dataframes on the 'image_id' column
df_traffic_count = pd.merge(df_pred_traffic_count, df_true_traffic_count, on='image_id')

# Calculate the RMSE and MAPE for each row
rmse_list = []
mape_list = []
for i in range(len(df_traffic_count)):
    actual = df_traffic_count.loc[i, list(NORMALISE_DICT.keys())].values
    predicted = df_traffic_count.loc[i, list(NORMALISE_DICT.values())].values
    rmse = np.sqrt(((actual - predicted) ** 2).mean())
    mape = np.abs((actual - predicted) / actual)
    mape = np.where(actual == 0, 0, mape)  # handle division by zero
    mape = mape.mean() * 100
    rmse_list.append(rmse)
    mape_list.append(mape)

# Add the RMSE and MAPE columns to the merged dataframe
df_traffic_count['RMSE'] = rmse_list
df_traffic_count['MAPE'] = mape_list

# x and y values of the data points
x = df_traffic_count['Total_N15']
y = df_traffic_count['Total Volume']

# coefficients of the line of best fit
coefficients = np.polyfit(x, y, 1)

# predicted y values using the line of best fit
y_predicted = np.polyval(coefficients, x)

# R-squared value of the line of best fit
r_squared = r2_score(y, y_predicted)

print("R-squared value of the line of best fit:", r_squared)

# Output the results
df_traffic_count

R-squared value of the line of best fit: 0.22867808659486555



divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in true_divide



Unnamed: 0.1,Unnamed: 0,Total,Small,Medium,Large,Very Large,image_id,Total_N15,Small_N15,Medium_N15,Large_N15,Very Large_N15,0-520cm,521-660cm,661-1160cm,1160+cm,Total Volume,RMSE,MAPE
0,0,13,2,8,3,0,blackburn_30361032,138.980263,21.381579,85.526316,32.072368,0.0,436,13,10,7,466,238.598659,465.611677
1,0,32,0,2,28,2,blackburn_30361033,342.105263,0.0,21.381579,299.342105,21.381579,504,31,18,7,560,276.024216,53.985055
2,0,86,3,13,37,33,havering_m25_5790a,222.04918,7.745902,33.565574,95.532787,85.204918,801,104,89,182,1176,557.431109,2200.169403
3,0,171,25,29,71,46,havering_m25_5790b,441.516393,64.54918,74.877049,183.319672,118.770492,545,248,117,158,1068,363.104257,237.325491
4,0,38,4,7,18,9,hounslow_m4_2188b,356.936416,37.572254,65.751445,169.075145,84.537572,523,247,9,9,788,311.652667,374.488028
5,0,1,1,0,0,0,hounslow_m4_2188a,9.393064,9.393064,0.0,0.0,0.0,688,50,17,9,764,454.490705,3051.643077
6,0,31,6,14,11,0,trafford_m60_9083a,494.52381,95.714286,223.333333,175.47619,0.0,645,257,36,8,946,324.406103,151.946951
7,0,37,4,15,17,1,trafford_m60_9086b,572.619048,61.904762,232.142857,263.095238,15.47619,816,29,12,10,867,389.785993,297.578926
8,0,40,8,11,15,6,luton_m1_2557a,477.941176,95.588235,131.433824,179.227941,71.691176,753,277,34,59,1123,422.074128,206.440951
9,0,47,4,22,16,5,luton_m1_2557b,544.301471,46.323529,254.779412,185.294118,57.904412,1045,33,16,63,1157,538.632853,491.130348


In [23]:
import numpy as np
import plotly.graph_objects as go

# create a scatter plot with a line of best fit
fig = go.Figure()

for image_id in df_traffic_count['image_id'].unique():
    fig.add_trace(go.Scatter(
        x=df_traffic_count[df_traffic_count['image_id'] == image_id]['Total_N15'],
        y=df_traffic_count[df_traffic_count['image_id'] == image_id]['Total Volume'], 
        mode='markers',
        marker={'size': 10},
        name=image_id,
        text=df_traffic_count[df_traffic_count['image_id'] == image_id]['image_id']
    ))

fig.add_trace(go.Scatter(
    x=df_traffic_count['Total_N15'],
    y=np.polyval(np.polyfit(df_traffic_count['Total_N15'], df_traffic_count['Total Volume'], 1), df_traffic_count['Total_N15']),
    mode='lines',
    name='Line of Best Fit'
))

fig.update_layout(
    title='Predicted Traffic Counts vs True Traffic for Chosen LA Count Sites',
    xaxis_title='Predicted Traffic Count',
    yaxis_title='True Traffic Count',
    legend_title='Image ID',
    width=1000,
    height=1000,
    xaxis=dict(
        scaleanchor="y",
        scaleratio=1,
        range=[min( min(df_traffic_count['Total Volume']), min(df_traffic_count['Total_N15']) ) , 
               max( max(df_traffic_count['Total Volume']), max(df_traffic_count['Total_N15']) )]  # Set x-axis range
    ),
    yaxis=dict(
        scaleanchor="x",
        scaleratio=1,
        range=[min( min(df_traffic_count['Total Volume']), min(df_traffic_count['Total_N15']) ) , 
               max( max(df_traffic_count['Total Volume']), max(df_traffic_count['Total_N15']) )]  # Set y-axis range
    )
)

# Save the plot as an image
file_path = os.path.join(PLOT_DIR, 'traffic_counts_eval_'+MODE+'_plot.png')
fig.write_image(file_path)

fig.show()

## Speed Estimation

## AADT and GHG 

In [24]:
merged_df = pd.merge(df_aadt[['image_id', 'mean_aadt', 'true_aadt']], df_ghg[['image_id', 'mean_ghg', 'true_ghg']], on='image_id')

merged_df.head()

Unnamed: 0,image_id,mean_aadt,true_aadt,mean_ghg,true_ghg
0,luton_m1_2557a,27608.9,25477.0,37.135259,55.167324
1,luton_m1_2557b,27608.9,25477.0,37.135259,55.167324
2,hounslow_m4_2188a,60784.41,56531.0,206.551773,183.956288
3,hounslow_m4_2188b,60784.41,56531.0,206.551773,183.956288
4,trafford_m60_9083a,35976.83,34266.0,79.136372,93.998716


In [25]:
merged_df['first_id'] = merged_df['image_id'].str.split('_').str[0]
merged_df = merged_df.drop_duplicates(subset='first_id', keep='first').drop(columns='first_id')
merged_df['image_id'] = merged_df['image_id'].str.split('_').str[0]

merged_df.head()

Unnamed: 0,image_id,mean_aadt,true_aadt,mean_ghg,true_ghg
0,luton,27608.9,25477.0,37.135259,55.167324
2,hounslow,60784.41,56531.0,206.551773,183.956288
4,trafford,35976.83,34266.0,79.136372,93.998716
6,havering,44823.93,50266.0,161.432228,149.200477
8,blackburn,21560.555,18027.0,40.076037,54.864172


In [26]:
# scatter plot of mean_aadt vs. mean_ghg
fig = go.Figure()

# Define a list of colors
colors = ['red', 'green', 'blue', 'yellow', 'orange', 'purple', 'pink']

# Generate a unique color for each unique image_id
image_ids = merged_df['image_id'].unique()
color_mapping = {image_id: colors[i % len(colors)] for i, image_id in enumerate(image_ids)}

for image_id in image_ids:
    # Get the color value for the current image_id
    color_value = color_mapping[image_id]
    
    image_df = merged_df[merged_df['image_id'] == image_id]
    
    fig.add_trace(go.Scatter(
        x=image_df['mean_aadt'],
        y=image_df['mean_ghg'],
        mode='markers',
        marker={
            'size': 10,
            'color': [color_value] * len(image_df),
            'cmin': 0,
            'cmax': 1
        },
        name=image_id,
        text=image_df['image_id']
    ))
    
    fig.add_trace(go.Scatter(
        x=image_df['true_aadt'],
        y=image_df['true_ghg'],
        mode='markers',
        marker={
            'size': 10,
            'symbol': 'star',
            'color': [color_value] * len(image_df),
            'cmin': 0,
            'cmax': 1
        },
        name=image_id,
        text=image_df['image_id']
    ))
    

fig.add_trace(go.Scatter(
    x=merged_df['mean_aadt'],
    y=np.polyval(np.polyfit(merged_df['mean_aadt'], merged_df['mean_ghg'], 1), merged_df['mean_aadt']),
    mode='lines',
    marker={
        'color': 'black'
    },
    name='Pred Line of Best Fit'
))

fig.add_trace(go.Scatter(
    x=merged_df['true_aadt'],
    y=np.polyval(np.polyfit(merged_df['true_aadt'], merged_df['true_ghg'], 1), merged_df['true_aadt']),
    mode='lines',
    marker={
        'color': 'black'
    },
    line={
        'dash': 'dash'
    },
    name='True Line of Best Fit'
))

fig.update_layout(
    title='A-Roads AADT vs GHG Emissions for Chosen LAs',
    xaxis_title='Mean AADT',
    yaxis_title='Mean GHG',
    legend_title='Image ID',
    width=1000,
    height=800
)

# Save the plot as an image
file_path = os.path.join(PLOT_DIR, 'aroads_aadt_ghg_eval_'+MODE+'_plot.png')
fig.write_image(file_path)

fig.show()