In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, mean_absolute_error
from datetime import datetime

In [2]:
dftrain = pd.read_csv("train_timeseries.csv").set_index(['fips', 'date'])
dftest = pd.read_csv("test_timeseries.csv").set_index(['fips', 'date'])

In [3]:
dfval = pd.read_csv("validation_timeseries.csv").set_index(['fips', 'date'])

## majority label baseline

In [None]:
def date_encode(date):
    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")
    return (
        np.sin(2 * np.pi * date.timetuple().tm_yday / 366),
        np.cos(2 * np.pi * date.timetuple().tm_yday / 366),
    )

In [6]:
def cleandf(df):
    df = df[['score']].copy()
    df = df.dropna(axis=0)
    df = df.round().astype(int)
    return df

def predict(dfclean):
    fips_codes = np.unique(dfclean.index.get_level_values(0))
    data = {}
    true = {}
    for fips in fips_codes:
        subdf = dfclean.loc[fips]
        subdf.sort_index()
        for i in range(25, len(subdf) - 5): # 180 days ~ 25 weeks
            window = subdf.iloc[i - 25 : i]
            maj = np.unique(window)[0]
            pred_idx = window.iloc[-1].name 
            data[(fips, pred_idx)] = [maj] * 6
            true[(fips, pred_idx)] = subdf.iloc[i:i+6].to_numpy().flatten()

    cols = ['w1', 'w2', 'w3', 'w4', 'w5','w6']
    dfres = pd.DataFrame.from_dict(data, orient='index', 
                       columns=cols)
    dftrue = pd.DataFrame.from_dict(true, orient='index', 
                       columns=cols)
    return dfres, dftrue

def evaluate(dfres, dftrue):
    # check indices match before evaluating
    if not np.all(dfres.index == dftrue.index):
        print("index mismatch")
        return
    
    cols = ['w1', 'w2', 'w3', 'w4', 'w5','w6']
    for col in cols:
        y = dftrue[[col]]
        y_pred = dfres[[col]]
        f1 = f1_score(y, y_pred, average='macro').round(3)
        mae = mean_absolute_error(y, y_pred).round(3)
        print(col, ' f1: ', f1, ' mae:', mae)
def do_all(df):
    res, true = predict(cleandf(df))
    evaluate(res, true)

In [7]:
dfs = [dftrain, dfval, dftest]

for df in dfs:
    do_all(df)

w1  f1:  0.395  mae: 0.506
w2  f1:  0.376  mae: 0.524
w3  f1:  0.359  mae: 0.54
w4  f1:  0.345  mae: 0.556
w5  f1:  0.332  mae: 0.57
w6  f1:  0.321  mae: 0.583
w1  f1:  0.251  mae: 0.475
w2  f1:  0.241  mae: 0.485
w3  f1:  0.232  mae: 0.493
w4  f1:  0.225  mae: 0.501
w5  f1:  0.22  mae: 0.507
w6  f1:  0.216  mae: 0.512
w1  f1:  0.18  mae: 0.401
w2  f1:  0.177  mae: 0.415
w3  f1:  0.174  mae: 0.429
w4  f1:  0.171  mae: 0.442
w5  f1:  0.168  mae: 0.456
w6  f1:  0.166  mae: 0.469


 f1 is worse on test set but MAE is better on test set?
 