<a href="https://colab.research.google.com/github/WRFitch/fyp/blob/main/src/fyp_ensemble_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Combining multiple models to interpolate greenhouse gases 

## Setup

### Import and install necessary supplementals 

In [None]:
!pip uninstall -y fastai
!pip install -U --no-cache-dir fastai

In [None]:
import os 
import numpy as np 
import pandas as pd 

from fastai.tabular.all import * 
from fastai.vision.all import * 
from google.colab import drive

drive.mount('/content/drive')

In [None]:
%cd /content
!git clone https://github.com/WRFitch/fyp.git

In [None]:
# Import fyputil library
%cd /content/fyp/src/fyputil
import constants as c
import fyp_utils as fyputil
%cd /content

In [5]:
big_png_dir = f"{c.data_dir}/png_224"

### Get data & Model

In [32]:
ghg_df = pd.read_csv(c.ghg_csv)
norm_df = fyputil.normGhgDf(ghg_df)

In [None]:
ghg_df.iloc[0:10]

In [7]:
def getGhgsAsArr(img_path):
  return fyputil.getGhgsAsArr(img_path, ghg_df)

In [8]:
cnn_model = load_learner(f"{c.model_dir}/mrghg_200221_bigimg.pkl")

In [9]:
predicted_df = ghg_df.copy()

#### Generate predicted data

In [27]:
def deNormPrediction(prediction, ghg_df):
  denormed = []
  idx = 0
  for band in c.ghg_bands:
    max = ghg_df[band].max()
    min = ghg_df[band].min()
    denormed.append(((prediction[idx]/100) * (max-min)) + min)
    idx += 1 
  return denormed

In [None]:
# Iterate through predicted_df and make a prediction using cnn_model 
for idx, row in predicted_df.iterrows():
  coords = (row.longitude, row.latitude)
  img_path = fyputil.getFilepath(coords)
  pred = cnn_model.predict(img_path)[0]
  dnorm_pred = fyputil.deNormGhgPrediction(pred, ghg_df)
  other_pred = deNormPrediction(pred, ghg_df)
  print(pred)
  print(dnorm_pred)
  print(other_pred)


# Write prediction to predicted_df along with the eight nearest coordinates and their measurements 

## Generate Tabular Model Ensemble
Train tabular model on ghg_df, predicting central reading based on eight nearest readings and cnn model output. 