<a href="https://colab.research.google.com/github/WRFitch/fyp/blob/main/src/fyp_ensemble_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Combining multiple models to interpolate greenhouse gases 

## Setup

### Import and install necessary supplementals 

In [None]:
!pip uninstall -y fastai
!pip install -U --no-cache-dir fastai

In [None]:
import os 
import numpy as np 
import pandas as pd 

from fastai.tabular.all import * 
from fastai.vision.all import * 
from google.colab import drive

drive.mount('/content/drive')

In [None]:
%cd /content
!git clone https://github.com/WRFitch/fyp.git

In [None]:
# Import fyputil library
%cd /content/fyp/src/fyputil
import constants as c
import fyp_utils as fyputil
%cd /content

In [None]:
big_png_dir = f"{c.data_dir}/png_224"

### Get data & Model

In [None]:
ghg_df = pd.read_csv(c.ghg_csv)
norm_df = fyputil.normGhgDf(ghg_df.copy())

In [None]:
ghg_df.iloc[0:10]

In [None]:
def getGhgsAsArr(img_path):
  return fyputil.getGhgsAsArr(img_path, ghg_df)

In [None]:
cnn_model = load_learner(f"{c.model_dir}/mrghg_200221_bigimg.pkl")

In [None]:
predicted_df = ghg_df.copy()

In [None]:
for band in c.ghg_bands:
  predicted_df[band+"_pred"] = predicted_df[band]

for idx, row in predicted_df.iterrows():
  for i in range(10, 16):
    predicted_df.iat[idx, i] = 0
    

In [None]:
predicted_df

#### Generate predicted data

In [None]:
# Iterate through predicted_df and make a prediction using cnn_model 
for idx, row in predicted_df.iterrows():
  img_path = fyputil.getFilepath((row.longitude, row.latitude))
  pred = cnn_model.predict(img_path)[0]
  dnorm_pred = fyputil.deNormGhgPrediction(pred, ghg_df)
  print(dnorm_pred)

  for i in range(0, 6):
    predicted_df.iat[idx, i+10] = dnorm_pred[i]

# Write prediction to predicted_df along with the eight nearest coordinates and their measurements 

In [None]:
predicted_df.to_csv(f"{c.data_dir}/ghg_plus_predictions.csv")

In [None]:
predicted_df = pd.from_csv(f"{c.data_dir}/ghg_plus_predictions.csv")

## Generate Tabular Model Ensemble
Train tabular model on ghg_df, predicting central reading based on eight nearest readings and cnn model output. 