In [1]:
import numpy as np
import pandas as pd
from lib import CovarianceModel, Hemisphere, df_residuals, pretty_cov

In [2]:
train_set = pd.read_csv('train.csv')
val_set = pd.read_csv('val.csv')
test_set = pd.read_csv('test.csv')

In [3]:
class GlobalCovModel(CovarianceModel):
    def __init__(self):
        self.cov = None

    def train(self, train_set: pd.DataFrame):
        self.cov = np.cov(df_residuals(train_set).T, ddof=1)
        print(pretty_cov(self.cov))
        print('Training complete')

    def estimate(self, long: float, lat: float, intensity: float) -> np.ndarray:
        return self.cov

GlobalCovModel.assess(train_set, test_set)

[[0.398, 0.010], [0.010, 0.268]]
Training complete


100%|██████████| 21544/21544 [00:02<00:00, 8858.83it/s]

GlobalCovModel:
  log likelihood: -90490
  log geo mean likelihood: -4.200
  geo mean p density: 0.01499





In [5]:
class BinnedCovModel(CovarianceModel):
    def __init__(self):
        self.covs = {}
        self.intensities = [-5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
        self.hemispheres = [Hemisphere.South, Hemisphere.North]

    def train(self, train_set: pd.DataFrame):
        # bin the data by hemisphere and intensity at the start of the time window
        # and calculate the sample covariance matrix for each bin
        for inten in self.intensities:
            correct_inten = train_set[train_set.intensity == inten]
            for hemi in self.hemispheres:
                subset = correct_inten[correct_inten.lat > 0] if hemi == Hemisphere.North else correct_inten[correct_inten.lat < 0]
                self.covs[hemi, inten] = np.cov(df_residuals(subset).T, ddof=1)
        for hemi, inten in self.covs:
            print(f"{hemi} {inten}: {pretty_cov(self.covs[hemi, inten])}")
        print('Training complete')

    def estimate(self, long: float, lat: float, intensity: float) -> np.ndarray:
        return self.covs[Hemisphere.latitude(lat), intensity]

BinnedCovModel.assess(train_set, test_set)

South -5.0: [[0.650, -0.052], [-0.052, 0.413]]
North -5.0: [[0.933, 0.024], [0.024, 0.403]]
South -4.0: [[0.793, -0.179], [-0.179, 0.547]]
North -4.0: [[1.863, 0.133], [0.133, 0.580]]
South -3.0: [[0.301, -0.017], [-0.017, 0.257]]
North -3.0: [[0.292, 0.014], [0.014, 0.241]]
South -2.0: [[0.462, -0.079], [-0.079, 0.291]]
North -2.0: [[0.541, 0.055], [0.055, 0.281]]
South -1.0: [[0.346, -0.029], [-0.029, 0.277]]
North -1.0: [[0.327, 0.022], [0.022, 0.261]]
South 0.0: [[0.344, -0.038], [-0.038, 0.277]]
North 0.0: [[0.340, 0.042], [0.042, 0.238]]
South 1.0: [[0.304, -0.039], [-0.039, 0.234]]
North 1.0: [[0.335, 0.031], [0.031, 0.220]]
South 2.0: [[0.308, -0.029], [-0.029, 0.221]]
North 2.0: [[0.288, 0.041], [0.041, 0.201]]
South 3.0: [[0.251, -0.035], [-0.035, 0.210]]
North 3.0: [[0.223, 0.028], [0.028, 0.182]]
South 4.0: [[0.219, -0.029], [-0.029, 0.171]]
North 4.0: [[0.194, 0.031], [0.031, 0.168]]
South 5.0: [[0.186, -0.045], [-0.045, 0.237]]
North 5.0: [[0.184, 0.052], [0.052, 0.165]]


100%|██████████| 21544/21544 [00:02<00:00, 8747.68it/s]

BinnedCovModel:
  log likelihood: -84599
  log geo mean likelihood: -3.927
  geo mean p density: 0.01971



