In [None]:
### Libraries required

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics

import matplotlib.pyplot as plt
from matplotlib import style
style.use("fivethirtyeight")


In [None]:
#Setting working directory
import os
os.chdir("/home/gabe/gatech/ml")

In [None]:
#Reading Data
data = pd.read_csv('covid_case_survey_US.csv')

data.info()
data.shape

In [None]:
#Drop dates and probable cases. Probable intances are noise at this point.
#Drop unknowns for sex and age_group since it does not provide any insights on severity.

cols = [1,4,9]

data.drop(data.columns[cols],axis=1,inplace=True)
data = data[data.current_status == "Laboratory-confirmed case"]
data = data[(data.sex == "Female") | (data.sex == "Male")]
data = data[data.age_group != "Unknown"]
data.head()

In [None]:
#Add different weights to medical condition since it is important to consider it
#eventhough you don't have too much data on it

data['medcond_yn'] = np.where(data['medcond_yn']== 'Yes', 1, data['medcond_yn'])
data['medcond_yn'] = np.where(data['medcond_yn']== 'No', -1, data['medcond_yn'])
data['medcond_yn'] = np.where(data['medcond_yn']== 'Unknown', 0, data['medcond_yn'])

data.shape
item_counts = data["medcond_yn"].value_counts()
print(item_counts)

In [None]:
data['severity'] = np.where(data['hosp_yn']== 'Yes', 'Hospitalized', 'Care at Home')
data['severity'] = np.where(data['icu_yn']== 'Yes', 'Intensive Care', data['severity'])
data['severity'] = np.where(data['death_yn']== 'Yes', 'Death', data['severity'])

cols = [3,4,5]

data.drop(data.columns[cols],axis=1,inplace=True)
data.head()

In [None]:
data['severity_type'] = np.select(
    [
        data['severity'] == "Care at Home", 
        data['severity'] == "Hospitalized",
        data['severity'] == "Intensive Care",
        data['severity'] == "Death",
    ], 
    [
        1, 
        2,
        3,
        4
    ], 
    default=1
)

In [None]:
data['sex'] = np.where(data['sex']== 'Male', 1, 2)
item_counts = data["severity_type"].value_counts()
print(item_counts)

In [None]:
#age_encoded= pd.get_dummies(data.age_group, prefix='age_group')
data['age_group'] = np.where(data['age_group'] == "0 - 9 Years", 0,data['age_group'])
data['age_group'] = np.where(data['age_group'] == "10 - 19 Years", 1,data['age_group'])
data['age_group'] = np.where(data['age_group'] == "20 - 29 Years", 2,data['age_group'])
data['age_group'] = np.where(data['age_group'] == "30 - 39 Years", 3,data['age_group'])
data['age_group'] = np.where(data['age_group'] == "40 - 49 Years", 4,data['age_group'])
data['age_group'] = np.where(data['age_group'] == "50 - 59 Years", 5,data['age_group'])
data['age_group'] = np.where(data['age_group'] == "60 - 69 Years", 6,data['age_group'])
data['age_group'] = np.where(data['age_group'] == "70 - 79 Years", 7,data['age_group'])
data['age_group'] = np.where(data['age_group'] == "80+ Years", 8,data['age_group'])
 
#data= data.drop(['age_group'],axis=1)
#data = pd.concat([data, age_encoded], axis=1)
#data.shape

item_counts = data['age_group'].value_counts()
print(item_counts)

data = data.sample(n=200000, random_state=1)

In [None]:
y = data["severity_type"]
y.head()


In [None]:
selected_columns = [0,3,4,5]
x = data.copy()
x.drop(x.columns[selected_columns],axis=1,inplace=True)
x.head()

In [None]:
# Training the model

X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.2)


In [None]:
from sklearn.svm import SVC
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets


def make_meshgrid(x, y, h=.02):
    x_min, x_max = x.min() - 1, x.max() + 1
    y_min, y_max = y.min() - 1, y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    return xx, yy

def plot_contours(ax, clf, xx, yy, **params):
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    out = ax.contourf(xx, yy, Z, **params)
    return out

# The classification SVC model
model = svm.SVC()
clf = model.fit(X_train, y_train)
fig, ax = plt.subplots()

# title for the plots
title = ('Decision surface of linear SVC ')

# Set-up grid for plotting.
X0, X1 = X_train.iloc[:, 0], X_train.iloc[:, 1]
xx, yy = make_meshgrid(X0, X1)
plot_contours(ax, clf, xx, yy, cmap=plt.cm.coolwarm, alpha=0.8)
ax.scatter(X0, X1, c=y_train, cmap=plt.cm.coolwarm, s=20, edgecolors="k")
ax.set_ylabel('Age Group (0 to 100 years old)')
ax.set_xlabel('Gender')
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(title)
plt.show()

In [None]:
y_predicted = clf.predict(X_test)
from sklearn.metrics import average_precision_score
#average_precision = average_precision_score(y_test, y_predicted)
print("Accuracy obtained:", metrics.accuracy_score(y_test, y_predicted))

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_predicted))
print(confusion_matrix(y_test, y_predicted))

In [None]:
from sklearn.svm import SVC
model = SVC()
model.fit(X_train, y_train)
model.score(X_test, y_test)

In [None]:
model_C = SVC(C=1)
model_C.fit(X_train, y_train)
model_C.score(X_test, y_test)

In [None]:
model_C = SVC(C=10)
model_C.fit(X_train, y_train)
model_C.score(X_test, y_test)

In [None]:
model_g = SVC(gamma=10)
model_g.fit(X_train, y_train)
model_g.score(X_test, y_test)

In [None]:
model_linear_kernal = SVC(kernel='sigmoid')
model_linear_kernal.fit(X_train, y_train)
model_linear_kernal.score(X_test, y_test)

In [None]:
model_linear_kernal = SVC(kernel='poly')
model_linear_kernal.fit(X_train, y_train)
model_linear_kernal.score(X_test, y_test)