# Evaluation

**TODO**:
- Add $R^2$ values to evaluation metrics
- Add speed estimation

## Imports

In [1]:
!pip install -U kaleido

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


In [2]:
import os
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
import numpy as np
import plotly.graph_objs as go
from sklearn.metrics import r2_score

## Global Variables

In [3]:
COLAB = True

In [4]:
ROOT_DIR_PATH = os.path.abspath('..')

if COLAB:

  from google.colab import drive
  drive.mount('/content/drive')

  ROOT_DIR_PATH = os.path.abspath('drive/MyDrive/Spatial_Finance_Transport/minorRoads/')

TRUE_AADT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/aadt/')

PRED_AADT_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/aadt/')

TRUE_GHG_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/ghg_emissions/')

PRED_GHG_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/ghg_emissions/')

TRUE_TRAFFIC_COUNT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/traffic_counts/')

PRED_TRAFFIC_COUNT_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/traffic_counts/')

PLOT_DIR = os.path.join(ROOT_DIR_PATH, 'data/predicted/results/plots/')

METRICS_DIR = os.path.join(ROOT_DIR_PATH, 'data/predicted/results/metrics/')

CHOSEN_COUNT_SITES = [('Luton', 'M1/2557A', 'M1/2557B'), ('Hounslow', 'M4/2188A', 'M4/2188B'), ('Enfield', 'M25/5441A', 'M25/5441B'), 
                      ('Blackburn with Darwen', '30361033', '30361032'), ('Havering', 'M25/5790A', 'M25/5790B'), ('Trafford', 'M60/9083A', 'M60/9086B')]

Mounted at /content/drive


In [5]:
NORMALISE_DICT = {
    'Total_N15': 'Total Volume',
    'Small_N15': '0-520cm',
    'Medium_N15': '521-660cm',
    'Large_N15': '661-1160cm',
    'Very Large_N15': '1160+cm'
}

MODE = 'median' # from max, mean, median

## Helper Functions

In [6]:
def get_files_by_prefix(directory, prefix):
    """
    Returns a list of file paths in a directory that match the start of a string.
    
    Args:
    directory (str): the path to the directory to search in.
    prefix (str): the prefix of the file names to match.
    
    Returns:
    A list of file paths that match the specified prefix.
    """
    matching_files = []
    for filename in os.listdir(directory):
        if prefix in filename:
            file_path = os.path.join(directory, filename)
            if os.path.isfile(file_path):
                matching_files.append(file_path)
    return matching_files

In [7]:
def match_before_underscore_or_space(str1, str2):
    """Checks if two strings match in elements before the first underscore or space.

    Args:
        str1 (str): The first string.
        str2 (str): The second string.

    Returns:
        bool: True if the strings match in elements before the first underscore or space, False otherwise.
    """
    # Find the index of the first underscore or space in both strings
    index1 = min(str1.find("_"), str1.find(" ")) if (str1.find("_") != -1 and str1.find(" ") != -1) else max(str1.find("_"), str1.find(" "))
    index2 = min(str2.find("_"), str2.find(" ")) if (str2.find("_") != -1 and str2.find(" ") != -1) else max(str2.find("_"), str2.find(" "))
    
    # Extract the substring before the first underscore or space from both strings
    sub_str1 = str1[:index1] if index1 >= 0 else str1
    sub_str2 = str2[:index2] if index2 >= 0 else str2
    
    # Compare the two substrings
    return sub_str1 == sub_str2

## AADT

### Load true data

In [8]:
df_true_aadt_list = []

prefix = 'all_motor_vehicles'

true_aadt_paths = get_files_by_prefix(TRUE_AADT_PATH, prefix)

for true_aadt_path in true_aadt_paths:
  df = pd.read_csv(true_aadt_path)

  print(df.iloc[0]['Local Authority'])

  df_true_aadt_list.append(df)

print("df list length: {}".format(len(df_true_aadt_list)))
df_true_aadt_list[0].head()

Luton
Hounslow
Enfield
Trafford
Havering
Blackburn with Darwen
df list length: 6


Unnamed: 0.1,Unnamed: 0,year,cars_and_taxis,buses_and_coaches,lgvs,all_hgvs,all_motor_vehicles,Local Authority
0,0,2005,4117.0,58.0,376.0,49.0,4602.0,Luton
1,1,2006,4079.0,57.0,379.0,56.0,4518.0,Luton
2,2,2007,2940.0,71.0,383.0,63.0,3499.0,Luton
3,3,2008,847.0,7.0,85.0,22.0,988.0,Luton
4,4,2009,2414.0,36.0,245.5,25.5,2836.0,Luton


### Load predicted data

In [9]:
df_pred_aadt_list = []

prefix = 'aadt_'

pred_aadt_paths = get_files_by_prefix(PRED_AADT_PATH, prefix)

for pred_aadt_path in pred_aadt_paths:
  df = pd.read_csv(pred_aadt_path)

  print(df.iloc[0]['image_id'])

  df_pred_aadt_list.append(df)

print("df list length: {}".format(len(df_pred_aadt_list)))
df_pred_aadt_list[0].head()

blackburn_30361032
havering_m25_5790b
blackburn_30361033
hounslow_m4_2188b
trafford_m60_9086b
havering_m25_5790a
hounslow_m4_2188a
trafford_m60_9083a
luton_m1_2557b
luton_m1_2557a
df list length: 10


Unnamed: 0,image_id,aadt,cars_and_taxis,buses_and_coaches,lgvs,all_hgvs
0,blackburn_30361032,651.68,639.8,542.97,551.39,543.34


### Average predictions

In [10]:
for df_pred_aadt_1 in df_pred_aadt_list:

  image_id_1 = df_pred_aadt_1.iloc[0]['image_id']

  aadt_1 = df_pred_aadt_1.iloc[0]['aadt']

  for df_pred_aadt_2 in df_pred_aadt_list:

    image_id_2 = df_pred_aadt_2.iloc[0]['image_id']

    aadt_2 = df_pred_aadt_2.iloc[0]['aadt']

    if match_before_underscore_or_space(image_id_1, image_id_2) and (image_id_1 != image_id_2):

      print("found match for: {}".format(image_id_1))

      mean_aadt = ( aadt_1 + aadt_2 ) / 2

      df_pred_aadt_1['mean_aadt'] = mean_aadt
      df_pred_aadt_2['mean_aadt'] = mean_aadt

df_pred_aadt_list[0].head()

found match for: blackburn_30361032
found match for: havering_m25_5790b
found match for: blackburn_30361033
found match for: hounslow_m4_2188b
found match for: trafford_m60_9086b
found match for: havering_m25_5790a
found match for: hounslow_m4_2188a
found match for: trafford_m60_9083a
found match for: luton_m1_2557b
found match for: luton_m1_2557a


Unnamed: 0,image_id,aadt,cars_and_taxis,buses_and_coaches,lgvs,all_hgvs,mean_aadt
0,blackburn_30361032,651.68,639.8,542.97,551.39,543.34,657.8


### Add true aadt column to predicted

In [11]:
df_aadt = pd.DataFrame()

for df_true_aadt in df_true_aadt_list:

  la_name = df_true_aadt.iloc[0]['Local Authority']

  true_aadt = df_true_aadt.loc[df_true_aadt['year'] == 2018]['all_motor_vehicles'].values

  for df_pred_aadt in df_pred_aadt_list:

    image_id = df_pred_aadt.iloc[0]['image_id']

    if match_before_underscore_or_space(la_name.lower(), image_id):

      print("found match for: {}".format(image_id))

      df_pred_aadt['true_aadt'] = true_aadt

      df_aadt = pd.concat([df_aadt, df_pred_aadt], ignore_index=True)

df_aadt

found match for: luton_m1_2557b
found match for: luton_m1_2557a
found match for: hounslow_m4_2188b
found match for: hounslow_m4_2188a
found match for: trafford_m60_9086b
found match for: trafford_m60_9083a
found match for: havering_m25_5790b
found match for: havering_m25_5790a
found match for: blackburn_30361032
found match for: blackburn_30361033


Unnamed: 0,image_id,aadt,cars_and_taxis,buses_and_coaches,lgvs,all_hgvs,mean_aadt,true_aadt
0,luton_m1_2557b,586.98,575.73,518.27,526.66,518.45,587.54,437.0
1,luton_m1_2557a,588.1,576.91,519.77,528.12,519.96,587.54,437.0
2,hounslow_m4_2188b,1375.54,1204.36,645.24,712.79,647.95,1287.08,3077.0
3,hounslow_m4_2188a,1198.62,1066.96,626.38,681.01,628.59,1287.08,3077.0
4,trafford_m60_9086b,1005.7,919.81,605.58,632.84,607.1,989.455,1300.5
5,trafford_m60_9083a,973.21,891.91,592.77,618.85,594.23,989.455,1300.5
6,havering_m25_5790b,1071.64,996.08,548.29,587.36,550.98,1068.51,3519.5
7,havering_m25_5790a,1065.38,990.58,550.05,588.29,552.69,1068.51,3519.5
8,blackburn_30361032,651.68,639.8,542.97,551.39,543.34,657.8,723.5
9,blackburn_30361033,663.92,651.48,550.08,558.9,550.47,657.8,723.5


### AADT Evaluation

In [27]:
df_aadt_results = pd.DataFrame(df_aadt)

# group dataframe by image_id and calculate RMSE and MAPE for each group
grouped = df_aadt.groupby('image_id')
df_aadt_results = grouped.apply(lambda x: pd.Series({'RMSE': mean_squared_error(x['true_aadt'], x['mean_aadt'], squared=False),
                                                      'MAPE': mean_absolute_percentage_error(x['true_aadt'], x['mean_aadt'])}))

# print the resulting dataframe with image_id as the index
result = df_aadt_results.reset_index()
result = result.rename(columns={'index': 'image_id'})
result = result[['image_id', 'RMSE', 'MAPE']]
result.set_index('image_id', inplace=True)

# add a row at the bottom of the dataframe with the average of the RMSE and MAPE columns
avg_row = result.mean()
avg_row.name = 'Average'
result = result.append(avg_row)

# x and y values of the data points
x = df_aadt['mean_aadt']
y = df_aadt['true_aadt']

# coefficients of the line of best fit
coefficients = np.polyfit(x, y, 1)

# predicted y values using the line of best fit
y_predicted = np.polyval(coefficients, x)

# R-squared value of the line of best fit
r_squared = r2_score(y, y_predicted)

print("R-squared value of the line of best fit:", r_squared)

result.to_csv(METRICS_DIR+'minor_roads_aadt_'+MODE+'_metrics.csv')

result

R-squared value of the line of best fit: 0.7585013305193832



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0_level_0,RMSE,MAPE
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1
blackburn_30361032,65.7,0.090809
blackburn_30361033,65.7,0.090809
havering_m25_5790a,2450.99,0.696403
havering_m25_5790b,2450.99,0.696403
hounslow_m4_2188a,1789.92,0.581709
hounslow_m4_2188b,1789.92,0.581709
luton_m1_2557a,150.54,0.344485
luton_m1_2557b,150.54,0.344485
trafford_m60_9083a,311.045,0.239173
trafford_m60_9086b,311.045,0.239173


In [13]:
# create a scatter plot with a line of best fit
fig = go.Figure()

for image_id in df_aadt['image_id'].unique():
    fig.add_trace(go.Scatter(
        x=df_aadt[df_aadt['image_id'] == image_id]['aadt'],
        y=df_aadt[df_aadt['image_id'] == image_id]['true_aadt'], 
        mode='markers',
        marker={'size': 10},
        name=image_id,
        text=df_aadt[df_aadt['image_id'] == image_id]['image_id']
    ))

fig.add_trace(go.Scatter(
    x=df_aadt['aadt'],
    y=np.polyval(np.polyfit(df_aadt['aadt'], df_aadt['true_aadt'], 1), df_aadt['aadt']),
    mode='lines',
    marker={
        'color': 'black'
    },
    name='Line of Best Fit'
))

fig.add_trace(go.Scatter(
    x=df_aadt['aadt'],
    y=df_aadt['aadt'],  # Use the x-values as y-values to create a diagonal line
    mode='lines',
    marker={
        'color': 'black'
    },
    line={
        'dash': 'dash'
    },
    name='Equal Line'
))

fig.update_layout(
    title='Minor Roads Predicted AADT vs True AADT for Chosen LAs',
    xaxis_title='Pred AADT',
    yaxis_title='True AADT',
    legend_title='Image ID',
    width=1000,
    height=1000,
    xaxis=dict(
        scaleanchor="y",
        scaleratio=1,
        range=[min( min(df_aadt['aadt']), min(df_aadt['true_aadt']) ) - 2000, max( max(df_aadt['aadt']), max(df_aadt['true_aadt']) ) + 2000]  # Set x-axis range
    ),
    yaxis=dict(
        scaleanchor="x",
        scaleratio=1,
        range=[min( min(df_aadt['aadt']), min(df_aadt['true_aadt']) ) - 2000, max( max(df_aadt['aadt']), max(df_aadt['true_aadt']) ) + 2000]  # Set y-axis range
    )
)

# Save the plot as an image
file_path = os.path.join(PLOT_DIR, 'minor_roads_aadt_eval_'+MODE+'_plot.png')
fig.write_image(file_path)

fig.show()

## GHG Emissions

### Load true data

In [14]:
df_true_ghg_list = []

prefix = 'ghg'

true_ghg_paths = get_files_by_prefix(TRUE_GHG_PATH, prefix)

for true_ghg_path in true_ghg_paths:
  df = pd.read_csv(true_ghg_path)

  print(df.iloc[0]['Local Authority'])

  df_true_ghg_list.append(df)

print("df list length: {}".format(len(df_true_ghg_list)))
df_true_ghg_list[0].head()

Luton
Hounslow
Enfield
Trafford
Havering
Blackburn with Darwen
df list length: 6


Unnamed: 0.1,Unnamed: 0,year,Annual Territorial emissions (kt CO2e),Local Authority
0,0,2005,147.206665,Luton
1,1,2006,142.744281,Luton
2,2,2007,146.037257,Luton
3,3,2008,140.888523,Luton
4,4,2009,137.339401,Luton


### Load predicted data

In [15]:
df_pred_ghg_list = []

prefix = 'ghg_'

pred_ghg_paths = get_files_by_prefix(PRED_GHG_PATH, prefix)

for pred_ghg_path in pred_ghg_paths:
  df = pd.read_csv(pred_ghg_path)

  print(df.iloc[0]['image_id'])

  df_pred_ghg_list.append(df)

print("df list length: {}".format(len(df_pred_ghg_list)))
df_pred_ghg_list[0].head()

blackburn_30361032
havering_m25_5790a
havering_m25_5790b
blackburn_30361033
hounslow_m4_2188a
trafford_m60_9083a
hounslow_m4_2188b
trafford_m60_9086b
luton_m1_2557a
luton_m1_2557b
df list length: 10


Unnamed: 0,image_id,ghg_emissions
0,blackburn_30361032,186.852058


### Average predictions

In [16]:
for df_pred_ghg_1 in df_pred_ghg_list:

  image_id_1 = df_pred_ghg_1.iloc[0]['image_id']

  ghg_1 = df_pred_ghg_1.iloc[0]['ghg_emissions']

  for df_pred_ghg_2 in df_pred_ghg_list:

    image_id_2 = df_pred_ghg_2.iloc[0]['image_id']

    ghg_2 = df_pred_ghg_2.iloc[0]['ghg_emissions']

    if match_before_underscore_or_space(image_id_1, image_id_2) and (image_id_1 != image_id_2):

      print("found match for: {}".format(image_id_1))

      mean_ghg = ( ghg_1 + ghg_2 ) / 2

      df_pred_ghg_1['mean_ghg'] = mean_ghg
      df_pred_ghg_2['mean_ghg'] = mean_ghg

df_pred_ghg_list[0].head()

found match for: blackburn_30361032
found match for: havering_m25_5790a
found match for: havering_m25_5790b
found match for: blackburn_30361033
found match for: hounslow_m4_2188a
found match for: trafford_m60_9083a
found match for: hounslow_m4_2188b
found match for: trafford_m60_9086b
found match for: luton_m1_2557a
found match for: luton_m1_2557b


Unnamed: 0,image_id,ghg_emissions,mean_ghg
0,blackburn_30361032,186.852058,188.096146


### Add true ghg column to predicted

In [17]:
df_ghg = pd.DataFrame(columns=['image_id', 'ghg', 'true_ghg', 'mean_ghg'])

for df_true_ghg in df_true_ghg_list:

    la_name = df_true_ghg.iloc[0]['Local Authority']

    true_ghg = df_true_ghg.loc[df_true_ghg['year'] == 2018]['Annual Territorial emissions (kt CO2e)'].values[0]

    for df_pred_ghg in df_pred_ghg_list:

        image_id = df_pred_ghg.iloc[0]['image_id']

        pred_ghg = df_pred_ghg.iloc[0]['mean_ghg']

        ghg_emissions = df_pred_ghg.iloc[0]['ghg_emissions']

        if match_before_underscore_or_space(la_name.lower(), image_id):

          print("found match for: {}".format(image_id))

          df_ghg.loc[len(df_ghg)] = [image_id, ghg_emissions, true_ghg, pred_ghg]

df_ghg

found match for: luton_m1_2557a
found match for: luton_m1_2557b
found match for: hounslow_m4_2188a
found match for: hounslow_m4_2188b
found match for: trafford_m60_9083a
found match for: trafford_m60_9086b
found match for: havering_m25_5790a
found match for: havering_m25_5790b
found match for: blackburn_30361032
found match for: blackburn_30361033


Unnamed: 0,image_id,ghg,true_ghg,mean_ghg
0,luton_m1_2557a,155.224755,134.404948,155.003636
1,luton_m1_2557b,154.782517,134.404948,155.003636
2,hounslow_m4_2188a,187.497209,153.869372,190.98876
3,hounslow_m4_2188b,194.480311,153.869372,190.98876
4,trafford_m60_9083a,307.124303,200.719724,310.526199
5,trafford_m60_9086b,313.928096,200.719724,310.526199
6,havering_m25_5790a,234.224956,134.443716,233.922511
7,havering_m25_5790b,233.620066,134.443716,233.922511
8,blackburn_30361032,186.852058,79.119739,188.096146
9,blackburn_30361033,189.340233,79.119739,188.096146


### GHG Evaluation

In [28]:
df_ghg_results = pd.DataFrame(df_ghg)

# group dataframe by image_id and calculate RMSE and MAPE for each group
grouped = df_ghg.groupby('image_id')
df_ghg_results = grouped.apply(lambda x: pd.Series({'RMSE': mean_squared_error(x['true_ghg'], x['mean_ghg'], squared=False),
                                                      'MAPE': mean_absolute_percentage_error(x['true_ghg'], x['mean_ghg'])}))

# print the resulting dataframe with image_id as the index
result = df_ghg_results.reset_index()

result = result.rename(columns={'index': 'image_id'})
result = result[['image_id', 'RMSE', 'MAPE']]

result.set_index('image_id', inplace=True)

# add a row at the bottom of the dataframe with the average of the RMSE and MAPE columns
avg_row = result.mean()
avg_row.name = 'Average'
result = result.append(avg_row)

# x and y values of the data points
x = df_ghg['mean_ghg']
y = df_ghg['true_ghg']

# coefficients of the line of best fit
coefficients = np.polyfit(x, y, 1)

# predicted y values using the line of best fit
y_predicted = np.polyval(coefficients, x)

# R-squared value of the line of best fit
r_squared = r2_score(y, y_predicted)

print("R-squared value of the line of best fit:", r_squared)

result.to_csv(METRICS_DIR+'minor_roads_ghg_'+MODE+'_metrics.csv')

result

R-squared value of the line of best fit: 0.48913784666335014



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0_level_0,RMSE,MAPE
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1
blackburn_30361032,108.976407,1.377361
blackburn_30361033,108.976407,1.377361
havering_m25_5790a,99.478795,0.739929
havering_m25_5790b,99.478795,0.739929
hounslow_m4_2188a,37.119388,0.24124
hounslow_m4_2188b,37.119388,0.24124
luton_m1_2557a,20.598689,0.153258
luton_m1_2557b,20.598689,0.153258
trafford_m60_9083a,109.806475,0.547064
trafford_m60_9086b,109.806475,0.547064


In [19]:
import numpy as np
import plotly.graph_objects as go

# create a scatter plot with a line of best fit
fig = go.Figure()

for image_id in df_ghg['image_id'].unique():
    fig.add_trace(go.Scatter(
        x=df_ghg[df_ghg['image_id'] == image_id]['ghg'],
        y=df_ghg[df_ghg['image_id'] == image_id]['true_ghg'], 
        mode='markers',
        marker={'size': 10},
        name=image_id,
        text=df_ghg[df_ghg['image_id'] == image_id]['image_id']
    ))

fig.add_trace(go.Scatter(
    x=df_ghg['ghg'],
    y=np.polyval(np.polyfit(df_ghg['ghg'], df_ghg['true_ghg'], 1), df_ghg['ghg']),
    mode='lines',
    marker={
        'color': 'black'
    },
    name='Line of Best Fit'
))

fig.add_trace(go.Scatter(
    x=df_ghg['ghg'],
    y=df_ghg['ghg'],  # Use the x-values as y-values to create a diagonal line
    mode='lines',
    marker={
        'color': 'black'
    },
    line={
        'dash': 'dash'
    },
    name='Equal Line'
))

fig.update_layout(
    title='Minor Roads Predicted GHG vs True GHG for Chosen LAs',
    xaxis_title='Pred GHG',
    yaxis_title='True GHG',
    legend_title='Image ID',
    width=1000,
    height=1000,
    xaxis=dict(
        scaleanchor="y",
        scaleratio=1,
        range=[min( min(df_ghg['ghg']), min(df_ghg['true_ghg']) ) - 20, max( max(df_ghg['ghg']), max(df_ghg['true_ghg']) ) + 20]  # Set x-axis range
    ),
    yaxis=dict(
        scaleanchor="x",
        scaleratio=1,
        range=[min( min(df_ghg['ghg']), min(df_ghg['true_ghg']) ) - 20, max( max(df_ghg['ghg']), max(df_ghg['true_ghg']) ) + 20]  # Set y-axis range
    )
)

# Save the plot as an image
file_path = os.path.join(PLOT_DIR, 'minor_roads_ghg_eval_'+MODE+'_plot.png')
fig.write_image(file_path)

fig.show()

## 15 Minute Traffic Count

### Load true data

In [20]:
df_true_traffic_count = pd.DataFrame()

prefix = 'traffic_count'

true_traffic_count_paths = get_files_by_prefix(TRUE_TRAFFIC_COUNT_PATH, prefix)

for true_traffic_count_path in true_traffic_count_paths:
  df = pd.read_csv(true_traffic_count_path, skipinitialspace=True)

  print(df.iloc[0]['image_id'])

  df_true_traffic_count = pd.concat([df_true_traffic_count, df], axis=0)

print("df list length: {}".format(len(df_true_traffic_count)))
df_true_traffic_count.head()

luton_m1_2557a
luton_m1_2557b
blackburn_30361033
blackburn_30361032
havering_m25_5790a
havering_m25_5790b
hounslow_m4_2188b
hounslow_m4_2188a
trafford_m60_9083a
trafford_m60_9086b
df list length: 10


Unnamed: 0,image_id,0-520cm,521-660cm,661-1160cm,1160+cm,Total Volume
0,luton_m1_2557a,753,277,34,59,1123
0,luton_m1_2557b,1045,33,16,63,1157
0,blackburn_30361033,504,31,18,7,560
0,blackburn_30361032,436,13,10,7,466
0,havering_m25_5790a,801,104,89,182,1176


### Load predicted data

In [21]:
df_pred_traffic_count = pd.DataFrame()

prefix = 'traffic_count'

pred_traffic_count_paths = get_files_by_prefix(PRED_TRAFFIC_COUNT_PATH, prefix)

for pred_traffic_count_path in pred_traffic_count_paths:
  df = pd.read_csv(pred_traffic_count_path, skipinitialspace=True)

  print(df.iloc[0]['image_id'])

  df_pred_traffic_count = pd.concat([df_pred_traffic_count, df], axis=0)

print("df list length: {}".format(len(df_pred_traffic_count)))
df_pred_traffic_count.head()

blackburn_30361032
havering_m25_5790b
havering_m25_5790a
blackburn_30361033
hounslow_m4_2188a
hounslow_m4_2188b
trafford_m60_9083a
trafford_m60_9086b
luton_m1_2557b
luton_m1_2557a
df list length: 10


Unnamed: 0.1,Unnamed: 0,Total,Small,Medium,Large,Very Large,image_id,Total_N15,Small_N15,Medium_N15,Large_N15,Very Large_N15
0,0,13,2,8,3,0,blackburn_30361032,138.980263,21.381579,85.526316,32.072368,0.0
0,0,171,25,29,71,46,havering_m25_5790b,441.516393,64.54918,74.877049,183.319672,118.770492
0,0,86,3,13,37,33,havering_m25_5790a,222.04918,7.745902,33.565574,95.532787,85.204918
0,0,32,0,2,28,2,blackburn_30361033,342.105263,0.0,21.381579,299.342105,21.381579
0,0,1,1,0,0,0,hounslow_m4_2188a,9.393064,9.393064,0.0,0.0,0.0


### Traffic Count Evaluation

In [22]:
# Merge the dataframes on the 'image_id' column
df_traffic_count = pd.merge(df_pred_traffic_count, df_true_traffic_count, on='image_id')

# Calculate the RMSE and MAPE for each row
rmse_list = []
mape_list = []
for i in range(len(df_traffic_count)):
    actual = df_traffic_count.loc[i, list(NORMALISE_DICT.keys())].values
    predicted = df_traffic_count.loc[i, list(NORMALISE_DICT.values())].values
    rmse = np.sqrt(((actual - predicted) ** 2).mean())
    mape = np.abs((actual - predicted) / actual)
    mape = np.where(actual == 0, 0, mape)  # handle division by zero
    mape = mape.mean() * 100
    rmse_list.append(rmse)
    mape_list.append(mape)

# Add the RMSE and MAPE columns to the merged dataframe
df_traffic_count['RMSE'] = rmse_list
df_traffic_count['MAPE'] = mape_list

# x and y values of the data points
x = df_traffic_count['Total_N15']
y = df_traffic_count['Total Volume']

# coefficients of the line of best fit
coefficients = np.polyfit(x, y, 1)

# predicted y values using the line of best fit
y_predicted = np.polyval(coefficients, x)

# R-squared value of the line of best fit
r_squared = r2_score(y, y_predicted)

print("R-squared value of the line of best fit:", r_squared)

# Output the results
df_traffic_count

R-squared value of the line of best fit: 0.22867808659486566



divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in true_divide



Unnamed: 0.1,Unnamed: 0,Total,Small,Medium,Large,Very Large,image_id,Total_N15,Small_N15,Medium_N15,Large_N15,Very Large_N15,0-520cm,521-660cm,661-1160cm,1160+cm,Total Volume,RMSE,MAPE
0,0,13,2,8,3,0,blackburn_30361032,138.980263,21.381579,85.526316,32.072368,0.0,436,13,10,7,466,238.598659,465.611677
1,0,171,25,29,71,46,havering_m25_5790b,441.516393,64.54918,74.877049,183.319672,118.770492,545,248,117,158,1068,363.104257,237.325491
2,0,86,3,13,37,33,havering_m25_5790a,222.04918,7.745902,33.565574,95.532787,85.204918,801,104,89,182,1176,557.431109,2200.169403
3,0,32,0,2,28,2,blackburn_30361033,342.105263,0.0,21.381579,299.342105,21.381579,504,31,18,7,560,276.024216,53.985055
4,0,1,1,0,0,0,hounslow_m4_2188a,9.393064,9.393064,0.0,0.0,0.0,688,50,17,9,764,454.490705,3051.643077
5,0,38,4,7,18,9,hounslow_m4_2188b,356.936416,37.572254,65.751445,169.075145,84.537572,523,247,9,9,788,311.652667,374.488028
6,0,31,6,14,11,0,trafford_m60_9083a,494.52381,95.714286,223.333333,175.47619,0.0,645,257,36,8,946,324.406103,151.946951
7,0,37,4,15,17,1,trafford_m60_9086b,572.619048,61.904762,232.142857,263.095238,15.47619,816,29,12,10,867,389.785993,297.578926
8,0,47,4,22,16,5,luton_m1_2557b,544.301471,46.323529,254.779412,185.294118,57.904412,1045,33,16,63,1157,538.632853,491.130348
9,0,40,8,11,15,6,luton_m1_2557a,477.941176,95.588235,131.433824,179.227941,71.691176,753,277,34,59,1123,422.074128,206.440951


In [23]:
import numpy as np
import plotly.graph_objects as go

# create a scatter plot with a line of best fit
fig = go.Figure()

for image_id in df_traffic_count['image_id'].unique():
    fig.add_trace(go.Scatter(
        x=df_traffic_count[df_traffic_count['image_id'] == image_id]['Total_N15'],
        y=df_traffic_count[df_traffic_count['image_id'] == image_id]['Total Volume'], 
        mode='markers',
        marker={'size': 10},
        name=image_id,
        text=df_traffic_count[df_traffic_count['image_id'] == image_id]['image_id']
    ))

fig.add_trace(go.Scatter(
    x=df_traffic_count['Total_N15'],
    y=np.polyval(np.polyfit(df_traffic_count['Total_N15'], df_traffic_count['Total Volume'], 1), df_traffic_count['Total_N15']),
    mode='lines',
    name='Line of Best Fit'
))

fig.update_layout(
    title='Predicted Traffic Counts vs True Traffic for Chosen LA Count Sites',
    xaxis_title='Predicted Traffic Count',
    yaxis_title='True Traffic Count',
    legend_title='Image ID',
    width=1000,
    height=1000,
    xaxis=dict(
        scaleanchor="y",
        scaleratio=1,
        range=[min( min(df_traffic_count['Total Volume']), min(df_traffic_count['Total_N15']) ) , 
               max( max(df_traffic_count['Total Volume']), max(df_traffic_count['Total_N15']) )]  # Set x-axis range
    ),
    yaxis=dict(
        scaleanchor="x",
        scaleratio=1,
        range=[min( min(df_traffic_count['Total Volume']), min(df_traffic_count['Total_N15']) ) , 
               max( max(df_traffic_count['Total Volume']), max(df_traffic_count['Total_N15']) )]  # Set y-axis range
    )
)

fig.show()

## Speed Estimation

## AADT and GHG 

In [24]:
merged_df = pd.merge(df_aadt[['image_id', 'mean_aadt', 'true_aadt']], df_ghg[['image_id', 'mean_ghg', 'true_ghg']], on='image_id')

merged_df.head()

Unnamed: 0,image_id,mean_aadt,true_aadt,mean_ghg,true_ghg
0,luton_m1_2557b,587.54,437.0,155.003636,134.404948
1,luton_m1_2557a,587.54,437.0,155.003636,134.404948
2,hounslow_m4_2188b,1287.08,3077.0,190.98876,153.869372
3,hounslow_m4_2188a,1287.08,3077.0,190.98876,153.869372
4,trafford_m60_9086b,989.455,1300.5,310.526199,200.719724


In [25]:
merged_df['first_id'] = merged_df['image_id'].str.split('_').str[0]
merged_df = merged_df.drop_duplicates(subset='first_id', keep='first').drop(columns='first_id')
merged_df['image_id'] = merged_df['image_id'].str.split('_').str[0]

merged_df.head()

Unnamed: 0,image_id,mean_aadt,true_aadt,mean_ghg,true_ghg
0,luton,587.54,437.0,155.003636,134.404948
2,hounslow,1287.08,3077.0,190.98876,153.869372
4,trafford,989.455,1300.5,310.526199,200.719724
6,havering,1068.51,3519.5,233.922511,134.443716
8,blackburn,657.8,723.5,188.096146,79.119739


In [26]:
# scatter plot of mean_aadt vs. mean_ghg
fig = go.Figure()

# Define a list of colors
colors = ['red', 'green', 'blue', 'yellow', 'orange', 'purple', 'pink']

# Generate a unique color for each unique image_id
image_ids = merged_df['image_id'].unique()
color_mapping = {image_id: colors[i % len(colors)] for i, image_id in enumerate(image_ids)}

for image_id in image_ids:
    # Get the color value for the current image_id
    color_value = color_mapping[image_id]
    
    image_df = merged_df[merged_df['image_id'] == image_id]
    
    fig.add_trace(go.Scatter(
        x=image_df['mean_aadt'],
        y=image_df['mean_ghg'],
        mode='markers',
        marker={
            'size': 10,
            'color': [color_value] * len(image_df),
            'cmin': 0,
            'cmax': 1
        },
        name=image_id,
        text=image_df['image_id']
    ))
    
    fig.add_trace(go.Scatter(
        x=image_df['true_aadt'],
        y=image_df['true_ghg'],
        mode='markers',
        marker={
            'size': 10,
            'symbol': 'star',
            'color': [color_value] * len(image_df),
            'cmin': 0,
            'cmax': 1
        },
        name=image_id,
        text=image_df['image_id']
    ))
    

fig.add_trace(go.Scatter(
    x=merged_df['mean_aadt'],
    y=np.polyval(np.polyfit(merged_df['mean_aadt'], merged_df['mean_ghg'], 1), merged_df['mean_aadt']),
    mode='lines',
    marker={
        'color': 'black'
    },
    name='Pred Line of Best Fit'
))

fig.add_trace(go.Scatter(
    x=merged_df['true_aadt'],
    y=np.polyval(np.polyfit(merged_df['true_aadt'], merged_df['true_ghg'], 1), merged_df['true_aadt']),
    mode='lines',
    marker={
        'color': 'black'
    },
    line={
        'dash': 'dash'
    },
    name='True Line of Best Fit'
))

fig.update_layout(
    title='Minor Roads AADT vs GHG Emissions for Chosen LAs',
    xaxis_title='Mean AADT',
    yaxis_title='Mean GHG',
    legend_title='Image ID',
    width=1000,
    height=800
)

# Save the plot as an image
file_path = os.path.join(PLOT_DIR, 'minor_roads_aadt_ghg_eval_'+MODE+'_plot.png')
fig.write_image(file_path)

fig.show()