In [1]:
import numpy as np
import pandas as pd
from lib import CovarianceModel, Hemisphere, df_residuals, pretty_cov

In [2]:
train_set = pd.read_csv('train.csv')
val_set = pd.read_csv('val.csv')
test_set = pd.read_csv('test.csv')

In [3]:
class GlobalCovModel(CovarianceModel):
    def __init__(self):
        self.cov = None

    def train(self, train_set: pd.DataFrame):
        self.cov = np.cov(df_residuals(train_set).T, ddof=1)
        print(pretty_cov(self.cov))
        print('Training complete')

    def estimate(self, long: float, lat: float, intensity: float) -> np.ndarray:
        return self.cov

GlobalCovModel.assess(val_set, test_set)

[[1.374, 0.028], [0.028, 0.737]]
Training complete


100%|██████████| 21544/21544 [00:01<00:00, 17098.85it/s]

GlobalCovModel:
  log likelihood: -63997
  log geo mean likelihood: -2.971
  geo mean p density: 0.05128





In [4]:
class BinnedCovModel(CovarianceModel):
    def __init__(self):
        self.covs = {}
        self.intensities = [-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
        self.hemispheres = [Hemisphere.South, Hemisphere.North]

    def train(self, train_set: pd.DataFrame):
        # bin the data by hemisphere and intensity at the start of the time window
        # and calculate the sample covariance matrix for each bin
        for inten in self.intensities:
            correct_inten = train_set[train_set.intensity == inten]
            for hemi in self.hemispheres:
                subset = correct_inten[correct_inten.lat > 0] if hemi == Hemisphere.North else correct_inten[correct_inten.lat < 0]
                self.covs[hemi, inten] = np.cov(df_residuals(subset).T, ddof=1)
        for hemi, inten in self.covs:
            print(f"{hemi} {inten}: {pretty_cov(self.covs[hemi, inten])}")
        print('Training complete')

    def estimate(self, long: float, lat: float, intensity: float) -> np.ndarray:
        return self.covs[Hemisphere.latitude(lat), intensity]

BinnedCovModel.assess(val_set, test_set)

South -5.0: [[2.388, -0.389], [-0.389, 1.424]]
North -5.0: [[2.434, 0.088], [0.088, 1.008]]
South -4.0: [[1.263, -0.270], [-0.270, 1.229]]
North -4.0: [[7.757, 1.778], [1.778, 2.306]]
South -3.0: [[0.514, -0.047], [-0.047, 0.513]]
North -3.0: [[1.129, 0.098], [0.098, 0.700]]
South -2.0: [[0.975, -0.815], [-0.815, 1.027]]
North -2.0: [[1.434, 0.068], [0.068, 1.007]]
South -1.0: [[1.403, 0.011], [0.011, 0.693]]
North -1.0: [[1.323, 0.160], [0.160, 0.686]]
South 0.0: [[1.652, -0.110], [-0.110, 0.788]]
North 0.0: [[0.980, 0.074], [0.074, 0.582]]
South 1.0: [[1.396, -0.489], [-0.489, 0.843]]
North 1.0: [[0.833, 0.105], [0.105, 0.638]]
South 2.0: [[0.724, -0.006], [-0.006, 0.512]]
North 2.0: [[0.591, 0.047], [0.047, 0.392]]
South 3.0: [[0.703, -0.068], [-0.068, 0.354]]
North 3.0: [[0.592, 0.134], [0.134, 0.429]]
South 4.0: [[1.470, -0.204], [-0.204, 0.479]]
North 4.0: [[0.477, 0.161], [0.161, 0.428]]
South 5.0: [[0.058, -0.008], [-0.008, 0.045]]
North 5.0: [[0.382, 0.029], [0.029, 0.172]]
Tr

100%|██████████| 21544/21544 [00:01<00:00, 17386.74it/s]

BinnedCovModel:
  log likelihood: -62118
  log geo mean likelihood: -2.883
  geo mean p density: 0.05595



