<a href="https://colab.research.google.com/github/WRFitch/fyp/blob/main/src/fyp_model_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Testing
A notebook for testing an exported model. Ideally, this can be considered a part of a model evaluation pipeline, in which a model can be evaluated in greater depth. 

## Setup

### Notebook Setup 

In [None]:
!pip uninstall -y fastai
!pip install -U --no-cache-dir fastai

In [None]:
from fastai.vision.all import *
from google.colab import drive
#from scipy import stats
from sklearn.metrics import mean_squared_error

import numpy as np 
import os 
import pandas as pd

drive.mount('/content/drive')

In [2]:
%rm -rf /content/fyp/

In [3]:
# Import fyputil library
%cd /content
!git clone https://github.com/WRFitch/fyp.git
%cd fyp/src/fyputil
import constants as c
import fyp_utils as fyputil
%cd /content

/content
Cloning into 'fyp'...
remote: Enumerating objects: 327, done.[K
remote: Counting objects: 100% (327/327), done.[K
remote: Compressing objects: 100% (268/268), done.[K
remote: Total 839 (delta 213), reused 111 (delta 59), pack-reused 512[K
Receiving objects: 100% (839/839), 144.30 MiB | 35.43 MiB/s, done.
Resolving deltas: 100% (468/468), done.
/content/fyp/src/fyputil
/content


### Data Setup 

In [4]:
# Add a dummy label script to fool fastai into letting us import the model. 
# We're not going to train the model further, so this is fine. 
def getGhgsAsArr(img_path):
  return np.array()

model = load_learner(f"{c.model_dir}/{c.model_name}.pkl")

In [5]:
ghg_df = pd.read_csv(c.ghg_csv)
ghg_df = fyputil.normGhgDf(ghg_df)
ghg_df

Unnamed: 0.1,Unnamed: 0,system:index,SO2_column_number_density,longitude,latitude,CH4_column_volume_mixing_ratio_dry_air,CO_column_number_density,tropospheric_HCHO_column_number_density,tropospheric_NO2_column_number_density,O3_column_number_density
0,134,0_134,2.672824,-0.795009,51.118631,3.293359,7.330961,4.340454,4.529627,5.862382
1,135,0_135,2.486536,-0.786026,51.118631,3.149588,7.000534,4.446930,4.546964,5.798349
2,136,0_136,2.740308,-0.777043,51.118631,2.931395,7.231526,4.323521,4.499055,5.487693
3,137,0_137,3.104459,-0.768060,51.118631,1.970592,7.131359,4.237725,4.479952,5.174860
4,138,0_138,3.176498,-0.759076,51.118631,2.547749,6.893881,4.456948,4.483478,5.098768
...,...,...,...,...,...,...,...,...,...,...
1881,4147,0_4147,2.313291,0.345851,51.379143,6.714247,9.540681,5.897296,5.975961,10.403010
1882,4148,0_4148,2.328240,0.354835,51.379143,9.206190,10.010053,5.582224,5.942072,10.446786
1883,4149,0_4149,2.456615,0.363818,51.379143,7.094298,11.400949,6.073959,5.988985,10.475704
1884,4150,0_4150,2.675886,0.372801,51.379143,7.094296,10.934001,6.224904,5.922944,10.375585


In [9]:
err_headers = [c.lon, c.lat] + c.ghg_bands
errors = pd.DataFrame(columns = err_headers)
errors.iloc[0:1]

Unnamed: 0,longitude,latitude,CO_column_number_density,tropospheric_HCHO_column_number_density,tropospheric_NO2_column_number_density,O3_column_number_density,SO2_column_number_density,CH4_column_volume_mixing_ratio_dry_air


## Testing

### Test model against existing data 

In [7]:
print(c.ghg_bands)

# TODO replace with fyputil, OR re-implement to keep any failed measurements and
# evaluate what you can out of them. Just because they're incomplete, that 
# doesn't make them worthless 
def getGhgs(img_path, df): 
  coords = fyputil.getCoords(str(img_path))
  ghgs = fyputil.getValAt(coords, df)
  concentrations = ghgs[c.ghg_bands]
  if len(concentrations) == 0 : return None 
  if None in concentrations: return None
  # There has to be a cleaner way to do this. Iterating through and then only getting the first line? really? 
  return [tuple(x) for x in concentrations.to_numpy()][0]

['CO_column_number_density', 'tropospheric_HCHO_column_number_density', 'tropospheric_NO2_column_number_density', 'O3_column_number_density', 'SO2_column_number_density', 'CH4_column_volume_mixing_ratio_dry_air']


In [10]:
mod = 0 

for filename in os.listdir(c.png_dir):
  file_ghgs = getGhgs(filename, ghg_df)
  if file_ghgs == None: continue
  if mod % 100 == 0:
    print(f"predicting ghg gases at {filename}")
    coords = fyputil.getCoords(filename) 
    prediction = model.predict(f"{c.png_dir}/{filename}")[0]

    diffs = [pred - act for pred, act in zip(prediction, file_ghgs)]
    errors.loc[len(errors)] = list(coords) + diffs

    print(tuple(coords))
    print(prediction)
    print(file_ghgs)
    print(diffs)
    print()
  mod += 1 



predicting ghg gases at -0.7860258736045813_51.34321006385125.png


(-0.7860258736045813, 51.34321006385125)
(3.321014881134033, 6.196832656860352, 5.849663734436035, 1.365954875946045, 2.97467041015625, 1.838371753692627)
(5.700281438838633, 6.1002314112803235, 6.016104068351802, 8.22480297615863, 3.8827485607775603, 3.091472696924484)
[-2.3792665577046, 0.09660124558002803, -0.16644033391576674, -6.858848100212585, -0.9080781506213103, -1.253100943231857]

predicting ghg gases at 0.3458513843860158_51.34321006385125.png


(0.3458513843860158, 51.34321006385125)
(2.124889612197876, 6.2424445152282715, 4.766998767852783, 0.021640244871377945, 0.8658193945884705, 0.0981532633304596)
(9.626941725286025, 6.1976230923727575, 5.828184649833629, 10.137142338690342, 2.0346703869559186, 7.042464733128327)
[-7.5020521130881495, 0.04482142285551394, -1.0611858819808457, -10.115502093818964, -1.1688509923674482, -6.944311469797867]

predicting ghg gases at 0.3368682315448206_51.361176369533645.png


(0.3368682315448206, 51.361176369533645)
(3.3293209075927734, 6.097378253936768, 5.32807731628418, 1.431531310081482, 2.906400680541992, 1.8007829189300537)
(9.956234468662256, 6.461085877567219, 5.901694406410933, 10.45639089524708, 2.1401855409447608, 8.553059731512462)
[-6.6269135610694825, -0.36370762363045106, -0.5736170901267537, -9.024859585165599, 0.7662151395972314, -6.752276812582409]

predicting ghg gases at 0.2829693144976493_51.37015952237484.png


(0.2829693144976493, 51.37015952237484)
(3.3076000213623047, 6.191211700439453, 5.416271686553955, 1.3829798698425293, 3.1824522018432617, 1.806948184967041)
(10.391668281118996, 5.767963388508207, 6.267165371473741, 10.520651740823228, 3.403908462697123, 7.49299322767672)
[-7.084068259756691, 0.42324831193124623, -0.8508936849197859, -9.137671870980698, -0.2214562608538615, -5.686045042709679]

predicting ghg gases at 0.20212093892689234_51.37914267521604.png


(0.20212093892689234, 51.37914267521604)
(3.3321032524108887, 6.117678165435791, 5.632369518280029, 1.3775795698165894, 2.9427270889282227, 1.8199608325958252)
(13.231953110715775, 5.472442028500891, 6.578555754674347, 10.672212784214041, 2.730297654753723, 8.06920623121323)
[-9.899849858304886, 0.6452361369349005, -0.9461862363943174, -9.294633214397452, 0.21242943417449967, -6.249245398617404]

predicting ghg gases at 0.1661883275621115_51.18151331270974.png


(0.1661883275621115, 51.18151331270974)
(3.3683762550354004, 6.302408218383789, 5.4630842208862305, 1.3483489751815796, 3.0831284523010254, 1.7234241962432861)
(11.379930188147522, 7.075242554693576, 5.255308728490337, 9.302583383118957, 4.597428241356164, 6.312187606295311)
[-8.011553933112122, -0.7728343363097867, 0.20777549239589366, -7.954234407937378, -1.5142997890551388, -4.5887634100520245]

predicting ghg gases at 0.10330625767374498_51.190496465550936.png


(0.10330625767374498, 51.190496465550936)
(3.3196768760681152, 5.961051940917969, 5.274539470672607, 1.4288438558578491, 3.048839569091797, 1.8395464420318604)
(10.227290422003058, 6.079860165063859, 5.305540846590793, 9.278129470713104, 2.2699609541613817, 6.959507218695194)
[-6.907613545934943, -0.11880822414588987, -0.031001375918185303, -7.849285614855255, 0.7788786149304152, -5.1199607766633335]

predicting ghg gases at 0.15720517472091627_51.11863124282137.png


(0.15720517472091627, 51.11863124282137)
(3.321943759918213, 6.082625865936279, 5.466413497924805, 1.428279995918274, 3.212303876876831, 1.8418364524841309)
(9.900489023510136, 5.118353502220666, 4.79641639047329, 8.924711341390113, 3.8594007380760975, 4.174333542878458)
[-6.5785452635919235, 0.9642723637156134, 0.6699971074515148, -7.496431345471839, -0.6470968611992665, -2.3324970903943267]

predicting ghg gases at 0.14822202187972106_51.16354700702735.png


(0.14822202187972106, 51.16354700702735)
(3.2901227474212646, 6.2062554359436035, 5.4096293449401855, 1.4005221128463745, 3.1695783138275146, 1.8324313163757324)
(9.374345383415806, 7.1090640464873385, 5.216828711067987, 9.414983596137127, 3.216587827040189, 4.575450086067349)
[-6.084222635994541, -0.902808610543735, 0.19280063387219837, -8.014461483290752, -0.047009513212674214, -2.7430187696916164]

predicting ghg gases at 0.35483453722721103_51.20846277123333.png


(0.35483453722721103, 51.20846277123333)
(3.2979013919830322, 6.0221452713012695, 5.528857231140137, 1.3838046789169312, 2.9740235805511475, 1.8157973289489746)
(13.075658462942396, 6.121630037661282, 5.228017339823881, 9.993153341582573, 2.753645542649176, 10.803995224347204)
[-9.777757070959364, -0.09948476636001224, 0.3008398913162553, -8.609348662665642, 0.22037803790197152, -8.98819789539823]

predicting ghg gases at 0.2919524673388445_51.226429076915714.png


(0.2919524673388445, 51.226429076915714)
(3.3548474311828613, 6.2579545974731445, 5.606722831726074, 1.353334665298462, 3.321960210800171, 1.7737843990325928)
(13.283626625063203, 5.748818590273779, 5.147581213645673, 9.900050693475437, 3.6848768157814935, 10.20236209364532)
[-9.928779193880342, 0.5091360071993654, 0.4591416180804009, -8.546716028176975, -0.3629166049813226, -8.428577694612727]

predicting ghg gases at 0.229070397450478_51.23541222975691.png


(0.229070397450478, 51.23541222975691)
(3.3318545818328857, 6.299411773681641, 5.276389122009277, 1.400618076324463, 3.186361312866211, 1.7870075702667236)
(11.086204820857995, 6.634902777062545, 5.248190538651104, 9.434269664732597, 4.143315876270762, 7.326542982640376)
[-7.75435023902511, -0.33549100338090465, 0.028198583358173046, -8.033651588408134, -0.9569545634045511, -5.539535412373652]

predicting ghg gases at 0.15720517472091627_51.2533785354393.png


(0.15720517472091627, 51.2533785354393)
(3.3234000205993652, 6.250617980957031, 5.260552406311035, 1.4195091724395752, 3.1809329986572266, 1.8401974439620972)
(10.67937172615635, 7.691812480574711, 5.285615723101502, 8.928915861153575, 2.575094113174213, 5.372410165744327)
[-7.3559717055569855, -1.44119449961768, -0.025063316790467027, -7.509406688714, 0.6058388854830135, -3.53221272178223]

predicting ghg gases at -0.26500300881525884_51.29829429964528.png


(-0.26500300881525884, 51.29829429964528)
(3.2730448246002197, 6.022161483764648, 5.457268714904785, 1.422963261604309, 2.920185089111328, 1.825869083404541)
(8.816976370830398, 5.150612706662084, 5.771363956664699, 10.026832925517926, 2.0119034711725514, 5.457337742286427)
[-5.543931546230178, 0.8715487771025643, -0.31409524175991343, -8.603869663913617, 0.9082816179387767, -3.631468658881886]

predicting ghg gases at -0.3278850787036254_51.307277452486474.png


(-0.3278850787036254, 51.307277452486474)
(3.2839252948760986, 6.203650951385498, 6.0514044761657715, 1.401476263999939, 2.7588353157043457, 1.773830771446228)
(8.90143681294954, 5.314312582843895, 5.859934272292446, 9.85033769633964, 3.167633323547813, 5.703700845297249)
[-5.6175115180734405, 0.889338368541603, 0.19147020387332514, -8.4488614323397, -0.4087980078434672, -3.9298700738510206]

predicting ghg gases at -0.3728008429096014_51.31626060532767.png


(-0.3728008429096014, 51.31626060532767)
(3.3379716873168945, 6.10349702835083, 5.651159286499023, 1.3769989013671875, 2.9202961921691895, 1.8517780303955078)
(10.078300130191703, 6.6084520392200226, 5.985903138440752, 10.317925013244578, 2.6896202617952873, 6.228845048422272)
[-6.740328442874809, -0.5049550108691925, -0.3347438519417283, -8.94092611187739, 0.2306759303739021, -4.377067018026764]

predicting ghg gases at -0.4626323713215536_51.32524375816886.png


(-0.4626323713215536, 51.32524375816886)
(3.325636148452759, 6.12178373336792, 5.4576735496521, 1.3504912853240967, 2.976593494415283, 1.768267035484314)
(9.378943867555597, 6.396413562055545, 6.158726493421717, 10.160233428704036, 3.521135454660214, 6.999510843386361)
[-6.053307719102838, -0.2746298286876252, -0.7010529437696178, -8.809742143379939, -0.5445419602449308, -5.231243807902047]

predicting ghg gases at -0.4087334542743823_51.19947961839213.png


(-0.4087334542743823, 51.19947961839213)
(3.3015613555908203, 6.065624237060547, 5.282308578491211, 1.3922606706619263, 3.0042386054992676, 1.822554111480713)
(4.351060694279749, 5.6530776365658495, 4.864550317647034, 8.727724031936113, 2.171961139029797, 5.7997290537097115)
[-1.0494993386889284, 0.4125466004946974, 0.41775826084417655, -7.335463361274186, 0.8322774664694705, -3.9771749422289986]

predicting ghg gases at 0.20212093892689234_51.32524375816886.png


(0.20212093892689234, 51.32524375816886)
(3.3425631523132324, 6.07095193862915, 5.254035472869873, 1.4213333129882812, 3.042912244796753, 1.776193618774414)
(9.738256132679341, 6.027676987823028, 5.572109976812338, 10.189548373458479, 4.085204740752072, 6.167747028206691)
[-6.395692980366109, 0.04327495080612209, -0.31807450394246484, -8.768215060470197, -1.0422924959553193, -4.391553409432277]



In [11]:
errors 

Unnamed: 0,longitude,latitude,CO_column_number_density,tropospheric_HCHO_column_number_density,tropospheric_NO2_column_number_density,O3_column_number_density,SO2_column_number_density,CH4_column_volume_mixing_ratio_dry_air
0,-0.786026,51.34321,-2.379267,0.096601,-0.16644,-6.858848,-0.908078,-1.253101
1,0.345851,51.34321,-7.502052,0.044821,-1.061186,-10.115502,-1.168851,-6.944311
2,0.336868,51.361176,-6.626914,-0.363708,-0.573617,-9.02486,0.766215,-6.752277
3,0.282969,51.37016,-7.084068,0.423248,-0.850894,-9.137672,-0.221456,-5.686045
4,0.202121,51.379143,-9.89985,0.645236,-0.946186,-9.294633,0.212429,-6.249245
5,0.166188,51.181513,-8.011554,-0.772834,0.207775,-7.954234,-1.5143,-4.588763
6,0.103306,51.190496,-6.907614,-0.118808,-0.031001,-7.849286,0.778879,-5.119961
7,0.157205,51.118631,-6.578545,0.964272,0.669997,-7.496431,-0.647097,-2.332497
8,0.148222,51.163547,-6.084223,-0.902809,0.192801,-8.014461,-0.04701,-2.743019
9,0.354835,51.208463,-9.777757,-0.099485,0.30084,-8.609349,0.220378,-8.988198


In [None]:
errors.to_csv(f"{c.data_dir}/errors2.csv")

### Basic stat testing 
- Data exploration 
- RMSE per GHG
- Extract outliers & view images 

In [None]:
errors = pd.read_csv(f"{c.data_dir}/errors2.csv")

In [12]:
model_stats = pd.DataFrame(columns = ["stat"] + c.ghg_bands)

In [13]:
def getRmse(series): 
  return np.sqrt(np.mean(series**2))

In [14]:
# Define aggregate metrics 
means = [errors[ghg].mean() for ghg in c.ghg_bands ]
stdevs = [errors[ghg].std() for ghg in c.ghg_bands ]
rmse = [getRmse(errors[ghg]) for ghg in c.ghg_bands ]
min = [ghg_df[ghg].min() for ghg in c.ghg_bands]
max = [ghg_df[ghg].max() for ghg in c.ghg_bands]
range = [maxval - minval for maxval, minval in zip(max, min)]
rmse_as_pct = [(errval / rngval) * 100 for errval, rngval in zip(rmse, range)]

model_stats.loc[1] = ["Mean"] + means
model_stats.loc[2] = ["Standard Deviation"] + stdevs 
model_stats.loc[3] = ["Min"] + min
model_stats.loc[4] = ["Max"] + max
model_stats.loc[5] = ["Range"] + range
model_stats.loc[6] = ["RMSE"] + rmse
model_stats.loc[7] = ["RMSE as percentage"] + rmse_as_pct

In [15]:
model_stats

Unnamed: 0,stat,CO_column_number_density,tropospheric_HCHO_column_number_density,tropospheric_NO2_column_number_density,O3_column_number_density,SO2_column_number_density,CH4_column_volume_mixing_ratio_dry_air
1,Mean,-6.699538,0.004532,-0.15023,-8.386954,-0.171964,-4.931375
2,Standard Deviation,2.214216,0.650072,0.501155,0.791724,0.753321,1.974943
3,Min,3.30039,4.034831,4.384961,4.828771,1.023293,0.990163
4,Max,15.376154,8.663142,7.415872,11.545652,5.474309,12.736687
5,Range,12.075764,4.628312,3.030911,6.716881,4.451017,11.746524
6,RMSE,7.03765,0.63275,0.510398,8.422282,0.753125,5.292785
7,RMSE as percentage,58.279128,13.671281,16.839765,125.38977,16.920285,45.058308


In [None]:
# Finding error rate as a percentage based on RMSE 

In [None]:
# Find outliers 

### Sample images vs predictions 
what regions are easier to predict than others? 

create accuracy heatmap 

### Activation Mapping