In [None]:
import sys
from google.colab import drive
import math
import pandas as pd
import csv
import numpy as np
import matplotlib.pyplot as plt
from google.colab import files
drive.mount('/content/drive')
sys.path += ['/content/drive/My Drive/COVID']
#from sklearn.linear_model import LinearRegression


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
path = "/content/drive/MyDrive/COVID/train_trendency.csv"
df_tend = pd.read_csv(path)

# path = "/content/drive/MyDrive/COVID/train_vaccine.csv"
# df_vac = pd.read_csv(path)

In [None]:
class Regression:
    def __init__(self, m=1):
        """
        Least squares regression.
        
        Attributes
        --------------------
            coef_   -- numpy array of shape (m+1,)
                       estimated coefficients for the autoregression problem
            m_      -- integer
                       order for autoregression
        """

        self.coef_ = None
        self.m_ = m
    
    def fit(self, X, y):
        """
        Parameters
        --------------------
            X       -- numpy array of shape (n,m+1), features
            y       -- numpy array of shape (n,), targets
        Returns
        --------------------        
            self    -- an instance of self
        """
        #w= (X^T X)^{−1} X^T y
        XTX = np.dot(X.T, X)           #(X^T X)
        invXTX = np.linalg.pinv(XTX)   #(X^T X)^(-1)
        invXTX_XT = np.dot(invXTX, X.T)#(X^T X)^(-1) * X^T
        self.coef_ = np.dot(invXTX_XT, y)
        return self
        

    def predict(self, X):
        """
        Predict output for X. (using known values)
        
        Parameters
        --------------------
            X       -- numpy array of shape (n,m+1), features
        
        Returns
        --------------------
            y       -- numpy array of shape (n,), predictions
        """

        if self.coef_ is None:
            raise Exception("Model not initialized. Perform a fit first.")
        y = np.dot(X,self.coef_) #multiply X by w
        return y
    
    # def guess(self, n, vals):
    #   m = self.m_
    #   result = []
    #   count = 0
    #   coefs = self.coef_[1:]
    #   intercept = self.coef_[0]
    #   while count < n:
    #     next_val = intercept + np.dot(coefs, vals[-m:])
    #     count += 1
    #     np.append(vals, [next_val])
    #   return vals[-n:]
      

In [None]:
def generate_polynomial(X, m):
        pwrs = np.arange(0,m+1)
        return np.array([np.power(x,pwrs) for x in X[:,0]])

In [None]:
death_raw = {}
confirm = {}
sts = df_tend["Province_State"].unique()
for st in sts:
  death_raw[st] = df_tend.loc[df_tend["Province_State"] == st]["Deaths"].to_numpy()
  confirm[st] = df_tend.loc[df_tend["Province_State"] == st]["Confirmed"].to_numpy()


In [None]:
def MAPE(predicted, truth):
  n = predicted.size
  total = 0
  for i in range(n):
    total += abs(predicted[i] - truth[i])/abs(truth[i])
  return total/n

In [None]:
def train_death(TRAIN_LEN, TEST_LEN, death_order, st):
  X_input = generate_polynomial(np.arange(TRAIN_LEN).reshape(-1, 1), death_order)
  X_output = generate_polynomial(np.arange(TRAIN_LEN, TRAIN_LEN+TEST_LEN).reshape(-1, 1), death_order)
  death_reg = Regression(death_order).fit(X_input, death_raw[st][0:TRAIN_LEN])
  return death_reg.predict(X_output)

def train_confirm(TRAIN_LEN, TEST_LEN, confirm_order, st):
  X_input = generate_polynomial(np.arange(TRAIN_LEN).reshape(-1, 1), confirm_order)
  X_output = generate_polynomial(np.arange(TRAIN_LEN, TRAIN_LEN+TEST_LEN).reshape(-1, 1), confirm_order)
  confirm_reg = Regression(confirm_order).fit(X_input, confirm[st][0:TRAIN_LEN])
  return confirm_reg.predict(X_output)


In [None]:
TEST_LEN = 12
TRAIN_LEN = death_raw["Alabama"].size - TEST_LEN


test_orders = [0,1,2,3,4,5,6]
death_orders = {}
for st in sts:
  cur_err = float('inf')
  death_orders[st] = 0
  for order in test_orders:
    res = train_death(TRAIN_LEN, TEST_LEN, order, st)
    err = MAPE(res, death_raw[st][TRAIN_LEN:TRAIN_LEN+TEST_LEN])
    if (err < cur_err):
      death_orders[st] = order
      cur_err = err

confirm_orders = {}
for st in sts:
    cur_err = float('inf')
    confirm_orders[st] = 0
    for order in test_orders:
      res = train_confirm(TRAIN_LEN, TEST_LEN, order, st)
      err = MAPE(res, confirm[st][TRAIN_LEN:TRAIN_LEN+TEST_LEN])
      if (err < cur_err):
        confirm_orders[st] = order
        cur_err = err

In [None]:
death_results = {}
confirm_results = {}

FINAL_TRAIN_LEN = death_raw["Alabama"].size
FINAL_TEST_LEN = 30

for st in sts:
  death_results[st]   = train_death(FINAL_TRAIN_LEN, FINAL_TEST_LEN, death_orders[st], st)
  confirm_results[st] = train_confirm(FINAL_TRAIN_LEN, FINAL_TEST_LEN, confirm_orders[st], st)


In [None]:
with open("/content/drive/MyDrive/COVID/Team8.csv", 'w') as csvfile: 
    csvwriter = csv.writer(csvfile) 
        
    # writing the fields 
    csvwriter.writerow(["ID", "Confirmed", "Deaths"]) 
    id = 0
    for i in range(0,30):
      for st in sts:
        csvwriter.writerow([id, confirm_results[st][i], death_results[st][i]])
        id += 1