# Evaluation

**TODO**:
- Add $R^2$ values to evaluation metrics
- Add speed estimation

## Imports

In [None]:
import os
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
import numpy as np
import plotly.graph_objs as go
from sklearn.metrics import r2_score

## Global Variables

In [None]:
COLAB = True

In [None]:
ROOT_DIR_PATH = os.path.abspath('..')

if COLAB:

  from google.colab import drive
  drive.mount('/content/drive')

  ROOT_DIR_PATH = os.path.abspath('drive/MyDrive/Spatial_Finance_Transport/')

TRUE_AADT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/aadt/')

PRED_AADT_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/aadt/')

TRUE_GHG_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/ghg_emissions/')

PRED_GHG_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/ghg_emissions/')

TRUE_TRAFFIC_COUNT_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/traffic_counts/')

PRED_TRAFFIC_COUNT_PATH = os.path.join(ROOT_DIR_PATH, 'data/predicted/traffic_counts/')

CHOSEN_COUNT_SITES = [('Luton', 'M1/2557A', 'M1/2557B'), ('Hounslow', 'M4/2188A', 'M4/2188B'), ('Enfield', 'M25/5441A', 'M25/5441B'), 
                      ('Blackburn with Darwen', '30361033', '30361032'), ('Havering', 'M25/5790A', 'M25/5790B'), ('Trafford', 'M60/9083A', 'M60/9086B')]

Mounted at /content/drive


In [None]:
NORMALISE_DICT = {
    'Total_N15': 'Total Volume',
    'Small_N15': '0-520cm',
    'Medium_N15': '521-660cm',
    'Large_N15': '661-1160cm',
    'Very Large_N15': '1160+cm'
}

## Helper Functions

In [None]:
def get_files_by_prefix(directory, prefix):
    """
    Returns a list of file paths in a directory that match the start of a string.
    
    Args:
    directory (str): the path to the directory to search in.
    prefix (str): the prefix of the file names to match.
    
    Returns:
    A list of file paths that match the specified prefix.
    """
    matching_files = []
    for filename in os.listdir(directory):
        if prefix in filename:
            file_path = os.path.join(directory, filename)
            if os.path.isfile(file_path):
                matching_files.append(file_path)
    return matching_files

In [None]:
def match_before_underscore_or_space(str1, str2):
    """Checks if two strings match in elements before the first underscore or space.

    Args:
        str1 (str): The first string.
        str2 (str): The second string.

    Returns:
        bool: True if the strings match in elements before the first underscore or space, False otherwise.
    """
    # Find the index of the first underscore or space in both strings
    index1 = min(str1.find("_"), str1.find(" ")) if (str1.find("_") != -1 and str1.find(" ") != -1) else max(str1.find("_"), str1.find(" "))
    index2 = min(str2.find("_"), str2.find(" ")) if (str2.find("_") != -1 and str2.find(" ") != -1) else max(str2.find("_"), str2.find(" "))
    
    # Extract the substring before the first underscore or space from both strings
    sub_str1 = str1[:index1] if index1 >= 0 else str1
    sub_str2 = str2[:index2] if index2 >= 0 else str2
    
    # Compare the two substrings
    return sub_str1 == sub_str2

## AADT

### Load true data

In [None]:
df_true_aadt_list = []

prefix = 'all_motor_vehicles'

true_aadt_paths = get_files_by_prefix(TRUE_AADT_PATH, prefix)

for true_aadt_path in true_aadt_paths:
  df = pd.read_csv(true_aadt_path)

  print(df.iloc[0]['Local Authority'])

  df_true_aadt_list.append(df)

print("df list length: {}".format(len(df_true_aadt_list)))
df_true_aadt_list[0].head()

Luton
Enfield
Havering
Trafford
Hounslow
Blackburn with Darwen
df list length: 6


Unnamed: 0.1,Unnamed: 0,year,all_motor_vehicles,Local Authority
0,0,2005,57600.0,Luton
1,1,2006,48583.5,Luton
2,2,2007,50652.5,Luton
3,3,2008,53492.5,Luton
4,4,2009,49612.5,Luton


### Load predicted data

In [None]:
df_pred_aadt_list = []

prefix = 'aadt_'

pred_aadt_paths = get_files_by_prefix(PRED_AADT_PATH, prefix)

for pred_aadt_path in pred_aadt_paths:
  df = pd.read_csv(pred_aadt_path)

  print(df.iloc[0]['image_id'])

  df_pred_aadt_list.append(df)

print("df list length: {}".format(len(df_pred_aadt_list)))
df_pred_aadt_list[0].head()

havering_m25_5790b
havering_m25_5790a
hounslow_m4_2188b
blackburn_30361032
blackburn_30361033
hounslow_m4_2188a
trafford_m60_9083a
luton_m1_2557b
luton_m1_2557a
trafford_m60_9086b
df list length: 10


Unnamed: 0,image_id,aadt
0,havering_m25_5790b,60603.17


### Average predictions

In [None]:
for df_pred_aadt_1 in df_pred_aadt_list:

  image_id_1 = df_pred_aadt_1.iloc[0]['image_id']

  aadt_1 = df_pred_aadt_1.iloc[0]['aadt']

  for df_pred_aadt_2 in df_pred_aadt_list:

    image_id_2 = df_pred_aadt_2.iloc[0]['image_id']

    aadt_2 = df_pred_aadt_2.iloc[0]['aadt']

    if match_before_underscore_or_space(image_id_1, image_id_2) and (image_id_1 != image_id_2):

      print("found match for: {}".format(image_id_1))

      mean_aadt = ( aadt_1 + aadt_2 ) / 2

      df_pred_aadt_1['mean_aadt'] = mean_aadt
      df_pred_aadt_2['mean_aadt'] = mean_aadt

df_pred_aadt_list[0].head()

found match for: havering_m25_5790b
found match for: havering_m25_5790a
found match for: hounslow_m4_2188b
found match for: blackburn_30361032
found match for: blackburn_30361033
found match for: hounslow_m4_2188a
found match for: trafford_m60_9083a
found match for: luton_m1_2557b
found match for: luton_m1_2557a
found match for: trafford_m60_9086b


Unnamed: 0,image_id,aadt,mean_aadt
0,havering_m25_5790b,60603.17,60565.205


### Add true aadt column to predicted

In [None]:
df_aadt = pd.DataFrame()

for df_true_aadt in df_true_aadt_list:

  la_name = df_true_aadt.iloc[0]['Local Authority']

  true_aadt = df_true_aadt.loc[df_true_aadt['year'] == 2018]['all_motor_vehicles'].values

  for df_pred_aadt in df_pred_aadt_list:

    image_id = df_pred_aadt.iloc[0]['image_id']

    if match_before_underscore_or_space(la_name.lower(), image_id):

      print("found match for: {}".format(image_id))

      df_pred_aadt['true_aadt'] = true_aadt

      df_aadt = pd.concat([df_aadt, df_pred_aadt], ignore_index=True)

df_aadt

found match for: luton_m1_2557b
found match for: luton_m1_2557a
found match for: havering_m25_5790b
found match for: havering_m25_5790a
found match for: trafford_m60_9083a
found match for: trafford_m60_9086b
found match for: hounslow_m4_2188b
found match for: hounslow_m4_2188a
found match for: blackburn_30361032
found match for: blackburn_30361033


Unnamed: 0,image_id,aadt,mean_aadt,true_aadt
0,luton_m1_2557b,70214.29,71058.5,70355.0
1,luton_m1_2557a,71902.71,71058.5,70355.0
2,havering_m25_5790b,60603.17,60565.205,66344.0
3,havering_m25_5790a,60527.24,60565.205,66344.0
4,trafford_m60_9083a,60938.07,61076.385,60317.5
5,trafford_m60_9086b,61214.7,61076.385,60317.5
6,hounslow_m4_2188b,61842.91,58762.19,51047.5
7,hounslow_m4_2188a,55681.47,58762.19,51047.5
8,blackburn_30361032,31364.52,30900.165,35333.0
9,blackburn_30361033,30435.81,30900.165,35333.0


### AADT Evaluation

In [None]:
df_aadt_results = pd.DataFrame(df_aadt)

# group dataframe by image_id and calculate RMSE and MAPE for each group
grouped = df_aadt.groupby('image_id')
df_aadt_results = grouped.apply(lambda x: pd.Series({'RMSE': mean_squared_error(x['true_aadt'], x['mean_aadt'], squared=False),
                                                      'MAPE': mean_absolute_percentage_error(x['true_aadt'], x['mean_aadt'])}))

# print the resulting dataframe with image_id as the index
result = df_aadt_results.reset_index()
result = result.rename(columns={'index': 'image_id'})
result = result[['image_id', 'RMSE', 'MAPE']]
result.set_index('image_id', inplace=True)

# add a row at the bottom of the dataframe with the average of the RMSE and MAPE columns
avg_row = result.mean()
avg_row.name = 'Average'
result = result.append(avg_row)

# x and y values of the data points
x = df_aadt['mean_aadt']
y = df_aadt['true_aadt']

# coefficients of the line of best fit
coefficients = np.polyfit(x, y, 1)

# predicted y values using the line of best fit
y_predicted = np.polyval(coefficients, x)

# R-squared value of the line of best fit
r_squared = r2_score(y, y_predicted)

print("R-squared value of the line of best fit:", r_squared)

result

R-squared value of the line of best fit: 0.875432947269268


  result = result.append(avg_row)


Unnamed: 0_level_0,RMSE,MAPE
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1
blackburn_30361032,4432.835,0.125459
blackburn_30361033,4432.835,0.125459
havering_m25_5790a,5778.795,0.087104
havering_m25_5790b,5778.795,0.087104
hounslow_m4_2188a,7714.69,0.151128
hounslow_m4_2188b,7714.69,0.151128
luton_m1_2557a,703.5,0.009999
luton_m1_2557b,703.5,0.009999
trafford_m60_9083a,758.885,0.012582
trafford_m60_9086b,758.885,0.012582


In [None]:
# create a scatter plot with a line of best fit
fig = go.Figure()
for image_id in df_aadt['image_id'].unique():
    fig.add_trace(go.Scatter(x=df_aadt[df_aadt['image_id']==image_id]['mean_aadt'], y=df_aadt[df_aadt['image_id']==image_id]['true_aadt'], 
                             mode='markers', marker={'size':10}, name=image_id, text=df_aadt[df_aadt['image_id']==image_id]['image_id']))
fig.add_trace(go.Scatter(x=df_aadt['mean_aadt'], y=np.polyval(np.polyfit(df_aadt['mean_aadt'], df_aadt['true_aadt'], 1), df_aadt['mean_aadt']),
                         mode='lines', name='Line of Best Fit'))
fig.update_layout(title='Mean Predicted AADT vs True AADT for Chosen LAs', xaxis_title='Mean AADT', yaxis_title='True AADT',
                  legend_title='Image ID', width=1000, height=800, xaxis=dict(scaleanchor="y", scaleratio=1), yaxis=dict(scaleanchor="x", scaleratio=1))
fig.show()

## GHG Emissions

### Load true data

In [None]:
df_true_ghg_list = []

prefix = 'ghg'

true_ghg_paths = get_files_by_prefix(TRUE_GHG_PATH, prefix)

for true_ghg_path in true_ghg_paths:
  df = pd.read_csv(true_ghg_path)

  print(df.iloc[0]['Local Authority'])

  df_true_ghg_list.append(df)

print("df list length: {}".format(len(df_true_ghg_list)))
df_true_ghg_list[0].head()

Enfield
Luton
Hounslow
Havering
Trafford
Blackburn with Darwen
df list length: 6


Unnamed: 0.1,Unnamed: 0,year,Annual Territorial emissions (kt CO2e),Local Authority
0,0,2005,122.334812,Enfield
1,1,2006,123.985743,Enfield
2,2,2007,125.392379,Enfield
3,3,2008,124.409843,Enfield
4,4,2009,125.528464,Enfield


### Load predicted data

In [None]:
df_pred_ghg_list = []

prefix = 'ghg_'

pred_ghg_paths = get_files_by_prefix(PRED_GHG_PATH, prefix)

for pred_ghg_path in pred_ghg_paths:
  df = pd.read_csv(pred_ghg_path)

  print(df.iloc[0]['image_id'])

  df_pred_ghg_list.append(df)

print("df list length: {}".format(len(df_pred_ghg_list)))
df_pred_ghg_list[0].head()

trafford_m60_9083a
blackburn_30361033
trafford_m60_9086b
blackburn_30361032
hounslow_m4_2188a
hounslow_m4_2188b
havering_m25_5790b
luton_m1_2557b
havering_m25_5790a
luton_m1_2557a
df list length: 10


Unnamed: 0,image_id,ghg_emissions
0,trafford_m60_9083a,5.114


### Average predictions

In [None]:
for df_pred_ghg_1 in df_pred_ghg_list:

  image_id_1 = df_pred_ghg_1.iloc[0]['image_id']

  ghg_1 = df_pred_ghg_1.iloc[0]['ghg_emissions']

  for df_pred_ghg_2 in df_pred_ghg_list:

    image_id_2 = df_pred_ghg_2.iloc[0]['image_id']

    ghg_2 = df_pred_ghg_2.iloc[0]['ghg_emissions']

    if match_before_underscore_or_space(image_id_1, image_id_2) and (image_id_1 != image_id_2):

      print("found match for: {}".format(image_id_1))

      mean_ghg = ( ghg_1 + ghg_2 ) / 2

      df_pred_ghg_1['mean_ghg'] = mean_ghg
      df_pred_ghg_2['mean_ghg'] = mean_ghg

df_pred_ghg_list[0].head()

found match for: trafford_m60_9083a
found match for: blackburn_30361033
found match for: trafford_m60_9086b
found match for: blackburn_30361032
found match for: hounslow_m4_2188a
found match for: hounslow_m4_2188b
found match for: havering_m25_5790b
found match for: luton_m1_2557b
found match for: havering_m25_5790a
found match for: luton_m1_2557a


Unnamed: 0,image_id,ghg_emissions,mean_ghg
0,trafford_m60_9083a,5.114,6.1675


### Add true ghg column to predicted

In [None]:
df_ghg = pd.DataFrame(columns=['image_id', 'true_ghg', 'mean_ghg'])

for df_true_ghg in df_true_ghg_list:

    la_name = df_true_ghg.iloc[0]['Local Authority']

    true_ghg = df_true_ghg.loc[df_true_ghg['year'] == 2018]['Annual Territorial emissions (kt CO2e)'].values[0]

    for df_pred_ghg in df_pred_ghg_list:

        image_id = df_pred_ghg.iloc[0]['image_id']

        pred_ghg = df_pred_ghg.iloc[0]['mean_ghg']

        if match_before_underscore_or_space(la_name.lower(), image_id):

          print("found match for: {}".format(image_id))

          df_ghg.loc[len(df_ghg)] = [image_id, true_ghg, pred_ghg]

df_ghg

found match for: luton_m1_2557b
found match for: luton_m1_2557a
found match for: hounslow_m4_2188a
found match for: hounslow_m4_2188b
found match for: havering_m25_5790b
found match for: havering_m25_5790a
found match for: trafford_m60_9083a
found match for: trafford_m60_9086b
found match for: blackburn_30361033
found match for: blackburn_30361032


Unnamed: 0,image_id,true_ghg,mean_ghg
0,luton_m1_2557b,34.480488,36.847
1,luton_m1_2557a,34.480488,36.847
2,hounslow_m4_2188a,65.784938,86.747
3,hounslow_m4_2188b,65.784938,86.747
4,havering_m25_5790b,161.286581,223.914
5,havering_m25_5790a,161.286581,223.914
6,trafford_m60_9083a,96.466577,6.1675
7,trafford_m60_9086b,96.466577,6.1675
8,blackburn_30361033,39.780264,34.013
9,blackburn_30361032,39.780264,34.013


### GHG Evaluation

In [None]:
df_ghg_results = pd.DataFrame(df_ghg)

# group dataframe by image_id and calculate RMSE and MAPE for each group
grouped = df_ghg.groupby('image_id')
df_ghg_results = grouped.apply(lambda x: pd.Series({'RMSE': mean_squared_error(x['true_ghg'], x['mean_ghg'], squared=False),
                                                      'MAPE': mean_absolute_percentage_error(x['true_ghg'], x['mean_ghg'])}))

# print the resulting dataframe with image_id as the index
result = df_ghg_results.reset_index()

result = result.rename(columns={'index': 'image_id'})
result = result[['image_id', 'RMSE', 'MAPE']]

result.set_index('image_id', inplace=True)

# add a row at the bottom of the dataframe with the average of the RMSE and MAPE columns
avg_row = result.mean()
avg_row.name = 'Average'
result = result.append(avg_row)

# x and y values of the data points
x = df_ghg['mean_ghg']
y = df_ghg['true_ghg']

# coefficients of the line of best fit
coefficients = np.polyfit(x, y, 1)

# predicted y values using the line of best fit
y_predicted = np.polyval(coefficients, x)

# R-squared value of the line of best fit
r_squared = r2_score(y, y_predicted)

print("R-squared value of the line of best fit:", r_squared)

result

R-squared value of the line of best fit: 0.6204810426125735



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0_level_0,RMSE,MAPE
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1
blackburn_30361032,5.767264,0.144978
blackburn_30361033,5.767264,0.144978
havering_m25_5790a,62.627419,0.388299
havering_m25_5790b,62.627419,0.388299
hounslow_m4_2188a,20.962062,0.318645
hounslow_m4_2188b,20.962062,0.318645
luton_m1_2557a,2.366512,0.068633
luton_m1_2557b,2.366512,0.068633
trafford_m60_9083a,90.299077,0.936066
trafford_m60_9086b,90.299077,0.936066


In [None]:
# create a scatter plot with a line of best fit
fig = go.Figure()
for image_id in df_ghg['image_id'].unique():
    fig.add_trace(go.Scatter(x=df_ghg[df_ghg['image_id']==image_id]['mean_ghg'], y=df_ghg[df_ghg['image_id']==image_id]['true_ghg'], 
                             mode='markers', marker={'size':10}, name=image_id, text=df_ghg[df_ghg['image_id']==image_id]['image_id']))
fig.add_trace(go.Scatter(x=df_ghg['mean_ghg'], y=np.polyval(np.polyfit(df_ghg['mean_ghg'], df_ghg['true_ghg'], 1), df_ghg['mean_ghg']),
                         mode='lines', name='Line of Best Fit'))
fig.update_layout(title='Mean Predicted GHG vs True GHG for Chosen LAs', xaxis_title='Mean GHG', yaxis_title='True GHG',
                  legend_title='Image ID', width=1000, height=800, xaxis=dict(scaleanchor="y", scaleratio=1), yaxis=dict(scaleanchor="x", scaleratio=1))
fig.show()

## 15 Minute Traffic Count

### Load true data

In [None]:
df_true_traffic_count = pd.DataFrame()

prefix = 'traffic_count'

true_traffic_count_paths = get_files_by_prefix(TRUE_TRAFFIC_COUNT_PATH, prefix)

for true_traffic_count_path in true_traffic_count_paths:
  df = pd.read_csv(true_traffic_count_path, skipinitialspace=True)

  print(df.iloc[0]['image_id'])

  df_true_traffic_count = pd.concat([df_true_traffic_count, df], axis=0)

print("df list length: {}".format(len(df_true_traffic_count)))
df_true_traffic_count.head()

luton_m1_2557a
luton_m1_2557b
blackburn_30361033
blackburn_30361032
havering_m25_5790a
havering_m25_5790b
hounslow_m4_2188b
hounslow_m4_2188a
trafford_m60_9083a
trafford_m60_9086b
df list length: 10


Unnamed: 0,image_id,0-520cm,521-660cm,661-1160cm,1160+cm,Total Volume
0,luton_m1_2557a,753,277,34,59,1123
0,luton_m1_2557b,1045,33,16,63,1157
0,blackburn_30361033,504,31,18,7,560
0,blackburn_30361032,436,13,10,7,466
0,havering_m25_5790a,801,104,89,182,1176


### Load predicted data

In [None]:
df_pred_traffic_count = pd.DataFrame()

prefix = 'traffic_count'

pred_traffic_count_paths = get_files_by_prefix(PRED_TRAFFIC_COUNT_PATH, prefix)

for pred_traffic_count_path in pred_traffic_count_paths:
  df = pd.read_csv(pred_traffic_count_path, skipinitialspace=True)

  print(df.iloc[0]['image_id'])

  df_pred_traffic_count = pd.concat([df_pred_traffic_count, df], axis=0)

print("df list length: {}".format(len(df_pred_traffic_count)))
df_pred_traffic_count.head()

trafford_m60_9083a
blackburn_30361032
havering_m25_5790b
blackburn_30361033
havering_m25_5790a
hounslow_m4_2188a
hounslow_m4_2188b
trafford_m60_9086b
luton_m1_2557a
luton_m1_2557b
df list length: 10


Unnamed: 0.1,Unnamed: 0,Total,Small,Medium,Large,Very Large,image_id,Total_N15,Small_N15,Medium_N15,Large_N15,Very Large_N15
0,0,2,1,1,0,0,trafford_m60_9083a,31.904762,15.952381,15.952381,0.0,0.0
0,0,1,1,0,0,0,blackburn_30361032,10.690789,10.690789,0.0,0.0,0.0
0,0,40,0,6,24,10,havering_m25_5790b,103.278689,0.0,15.491803,61.967213,25.819672
0,0,1,1,0,0,0,blackburn_30361033,10.690789,10.690789,0.0,0.0,0.0
0,0,14,0,3,7,4,havering_m25_5790a,36.147541,0.0,7.745902,18.07377,10.327869


### Traffic Count Evaluation

In [None]:
# Merge the dataframes on the 'image_id' column
df_traffic_count = pd.merge(df_pred_traffic_count, df_true_traffic_count, on='image_id')

# Calculate the RMSE and MAPE for each row
rmse_list = []
mape_list = []
for i in range(len(df_traffic_count)):
    actual = df_traffic_count.loc[i, list(NORMALISE_DICT.keys())].values
    predicted = df_traffic_count.loc[i, list(NORMALISE_DICT.values())].values
    rmse = np.sqrt(((actual - predicted) ** 2).mean())
    mape = np.abs((actual - predicted) / actual)
    mape = np.where(actual == 0, 0, mape)  # handle division by zero
    mape = mape.mean() * 100
    rmse_list.append(rmse)
    mape_list.append(mape)

# Add the RMSE and MAPE columns to the merged dataframe
df_traffic_count['RMSE'] = rmse_list
df_traffic_count['MAPE'] = mape_list

# x and y values of the data points
x = df_traffic_count['Total_N15']
y = df_traffic_count['Total Volume']

# coefficients of the line of best fit
coefficients = np.polyfit(x, y, 1)

# predicted y values using the line of best fit
y_predicted = np.polyval(coefficients, x)

# R-squared value of the line of best fit
r_squared = r2_score(y, y_predicted)

print("R-squared value of the line of best fit:", r_squared)

# Output the results
df_traffic_count

R-squared value of the line of best fit: 0.21570550835202473



divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in true_divide


divide by zero encountered in double_scalars


divide by zero encountered in true_divide



Unnamed: 0.1,Unnamed: 0,Total,Small,Medium,Large,Very Large,image_id,Total_N15,Small_N15,Medium_N15,Large_N15,Very Large_N15,0-520cm,521-660cm,661-1160cm,1160+cm,Total Volume,RMSE,MAPE
0,0,2,1,1,0,0,trafford_m60_9083a,31.904762,15.952381,15.952381,0.0,0.0,645,257,36,8,946,508.08168,1663.880597
1,0,1,1,0,0,0,blackburn_30361032,10.690789,10.690789,0.0,0.0,0.0,436,13,10,7,466,278.751646,1647.433846
2,0,40,0,6,24,10,havering_m25_5790b,103.278689,0.0,15.491803,61.967213,25.819672,545,248,117,158,1068,510.347434,607.137566
3,0,1,1,0,0,0,blackburn_30361033,10.690789,10.690789,0.0,0.0,0.0,504,31,18,7,560,330.583903,1950.498462
4,0,14,0,3,7,4,havering_m25_5790a,36.147541,0.0,7.745902,18.07377,10.327869,801,104,89,182,1176,630.020846,1290.125472
5,0,1,1,0,0,0,hounslow_m4_2188a,9.393064,9.393064,0.0,0.0,0.0,688,50,17,9,764,454.490705,3051.643077
6,0,5,0,2,2,1,hounslow_m4_2188b,46.965318,0.0,18.786127,18.786127,9.393064,523,247,9,9,788,418.2915,569.782154
7,0,9,2,7,0,0,trafford_m60_9086b,139.285714,30.952381,108.333333,0.0,0.0,816,29,12,10,867,480.084497,626.4
8,0,6,3,3,0,0,luton_m1_2557a,71.691176,35.845588,35.845588,0.0,0.0,753,277,34,59,1123,580.060884,827.975385
9,0,5,0,3,2,0,luton_m1_2557b,57.904412,0.0,34.742647,23.161765,0.0,1045,33,16,63,1157,678.831266,386.811429


In [None]:
# create a scatter plot with a line of best fit
fig = go.Figure()
for image_id in df_traffic_count['image_id'].unique():
    fig.add_trace(go.Scatter(x=df_traffic_count[df_traffic_count['image_id']==image_id]['Total_N15'], y=df_traffic_count[df_traffic_count['image_id']==image_id]['Total Volume'], 
                             mode='markers', marker={'size':10}, name=image_id, text=df_traffic_count[df_traffic_count['image_id']==image_id]['image_id']))
fig.add_trace(go.Scatter(x=df_traffic_count['Total_N15'], y=np.polyval(np.polyfit(df_traffic_count['Total_N15'], df_traffic_count['Total Volume'], 1), df_traffic_count['Total_N15']),
                         mode='lines', name='Line of Best Fit'))
fig.update_layout(title='Predicted Traffic Counts vs True Traffic for Chosen LA Count Sites', xaxis_title='Predicted Traffic Count', yaxis_title='True Traffic Count',
                  legend_title='Image ID', width=1000, height=800, xaxis=dict(scaleanchor="y", scaleratio=1), yaxis=dict(scaleanchor="x", scaleratio=1))
fig.show()

## AADT and GHG 

In [None]:
merged_df = pd.merge(df_aadt[['image_id', 'mean_aadt', 'true_aadt']], df_ghg[['image_id', 'mean_ghg', 'true_ghg']], on='image_id')

merged_df.head()

Unnamed: 0,image_id,mean_aadt,true_aadt,mean_ghg,true_ghg
0,luton_m1_2557b,71058.5,70355.0,36.847,34.480488
1,luton_m1_2557a,71058.5,70355.0,36.847,34.480488
2,havering_m25_5790b,60565.205,66344.0,223.914,161.286581
3,havering_m25_5790a,60565.205,66344.0,223.914,161.286581
4,trafford_m60_9083a,61076.385,60317.5,6.1675,96.466577


In [None]:
# scatter plot of mean_aadt vs. mean_ghg
fig = go.Figure()
for image_id in merged_df['image_id'].unique():
    fig.add_trace(go.Scatter(x=merged_df[merged_df['image_id']==image_id]['mean_aadt'], y=merged_df[merged_df['image_id']==image_id]['mean_ghg'], 
                             mode='markers', marker={'size':10}, name=image_id, text=merged_df[merged_df['image_id']==image_id]['image_id']))
fig.add_trace(go.Scatter(x=merged_df['mean_aadt'], y=np.polyval(np.polyfit(merged_df['mean_aadt'], merged_df['mean_ghg'], 1), merged_df['mean_aadt']),
                         mode='lines', name='Line of Best Fit'))
fig.update_layout(title='Mean Predicted AADT vs Mean GHG for Chosen LAs', xaxis_title='Mean AADT', yaxis_title='Mean GHG',
                  legend_title='Image ID', width=1000, height=800)

fig.show()