In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [None]:
train_data = pd.read_excel("data/cases_2021_train_processed.xlsx")
test_data = pd.read_excel("data/cases_2021_test_processed_unlabelled.xlsx")

In [None]:
cols = ['age', 'Confirmed', 'Deaths', 'Recovered', 'Active']

In [None]:
train_data[cols] = train_data[cols].apply(pd.to_numeric, downcast='integer', axis=1)
test_data[cols] = test_data[cols].apply(pd.to_numeric, downcast='integer', axis=1)

In [None]:
outcome_groups = {'deceased': 0, 'hospitalized': 1,'nonhospitalized': 2}
sex = {'male': 0, 'female': 1}

In [None]:
train_data['outcome_group'] = train_data['outcome_group'].map(outcome_groups)
train_data['sex'] = train_data['sex'].map(sex)
train_data['province'] = train_data['province'].fillna('Philippines')
train_data['chronic_disease_binary'] = train_data['chronic_disease_binary'].astype(int)


In [None]:
test_data['sex'] = test_data['sex'].map(sex)
test_data['province'] = test_data['province'].fillna('Philippines')
test_data['chronic_disease_binary'] = test_data['chronic_disease_binary'].astype(int)

In [None]:
train_data['outcome_group'].value_counts()

In [None]:
train_data.drop(['province', 'country', 'date_confirmation'], axis=1, inplace=True)

In [None]:
X = train_data.drop('outcome_group', axis=1)
y = train_data['outcome_group']
kfold = KFold(n_splits=5, shuffle=True)
all_labels = []
all_predictions = []
for train_index, test_index in kfold.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    
    
    all_labels = all_labels + list(y_test)
    all_predictions = all_predictions + list(predictions)
    
report = classification_report(all_labels, all_predictions)
print(report)