In [66]:
import pandas as pd
import numpy as np
from sklearn import linear_model

In [173]:
covid19_data = pd.read_csv('covid-dataset/COVID19_open_line_list.csv')

In [174]:
covid19_data = covid19_data[['age', 'sex', 'country', 'symptoms', 'travel_history_dates', 
                             'chronic_disease', 'outcome']]

In [175]:
covid19_data['chronic'] = np.where(covid19_data.chronic_disease.isna(), -1, 1)
covid19_data = covid19_data[covid19_data['outcome'].notnull()]
covid19_data['symptoms'] = covid19_data['symptoms'].str.split().str.len().fillna(0)
covid19_data.sex = np.where(covid19_data.sex=='male', -1, 1)
covid19_data['travel'] = np.where(covid19_data['travel_history_dates'].isna(), -1, 1)

In [176]:
covid19_data.outcome.unique()

array(['discharged', 'discharge', 'died', 'Discharged', '05.02.2020',
       'death',
       'Symptoms only improved with cough. Currently hospitalized for follow-up.',
       'recovered', 'stable',
       'critical condition, intubated as of 14.02.2020',
       'treated in an intensive care unit (14.02.2020)', 'severe'],
      dtype=object)

In [177]:
covid19_data.outcome = np.where(covid19_data.outcome.isin(['death', 'died']), 3, covid19_data.outcome)
covid19_data.outcome = np.where(covid19_data.outcome.isin(['severe','treated in an intensive care unit (14.02.2020)'])
         , 3, covid19_data.outcome)
covid19_data.outcome = np.where(covid19_data.outcome.isin(['critical condition, intubated as of 14.02.2020']), 3, covid19_data.outcome)
covid19_data.outcome = np.where(covid19_data.outcome.isin(['stable']), 2, covid19_data.outcome)
covid19_data.outcome = np.where(covid19_data.outcome.isin(['recovered']), 1, covid19_data.outcome)
covid19_data.outcome = np.where(covid19_data.outcome.isin(['Symptoms only improved with cough. Currently hospitalized for follow-up.'])
         , 3, covid19_data.outcome)
covid19_data.outcome = np.where(covid19_data.outcome.isin(['discharged', 'discharge','Discharged']), 1, covid19_data.outcome)
covid19_data.outcome = np.where(covid19_data.outcome.isin(['05.02.2020']), 1, covid19_data.outcome)



In [178]:
covid19_data.age = covid19_data.age.fillna(40)
covid19_data.age.unique()
covid19_data.age = np.where(covid19_data.age=='80-89', '85', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='20-29', '25', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='21-72', '40', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='40-49', '45', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='38-68', '50', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='22-80', '50', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='19-77', '50', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='0.25', '25', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='50-59', '55', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='70-79', '75', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='60-69', '65', covid19_data.age)
covid19_data.age = np.where(covid19_data.age=='0.5', '50', covid19_data.age)

covid19_data.age = covid19_data.age.astype(int)

In [179]:
hospital_beds = pd.read_csv('WBH/hospital_beds.csv')
hospital_beds['beds'] = hospital_beds.iloc[:,4:].max(axis=1)
hospital_beds = hospital_beds[['Country Name', 'beds']]

In [180]:
training_set = pd.merge(hospital_beds, covid19_data, left_on='Country Name', right_on='country')


In [186]:
classes = training_set['outcome']
data = training_set[['age','symptoms','chronic']]


In [187]:
X = np.array(data)
Y = np.array(list(classes))
clf = linear_model.SGDClassifier(max_iter=1000, tol=1e-3)
clf.fit(X, Y)

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge',
              max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False)

In [188]:
print(clf.predict(data))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 2 2 2 2 2 2 2 1 1 1 2 2 1 1
 1 1 1 1 2 1 2 2 2 1 2 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 2 2 1 1 1 2 2 2 1 1 1 1 1 1 2 2 2 1 1 2 1 1 1 2 1 2 1 1 1 2 1 2 2 1
 1 1 2 1 2 1 1 2 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [189]:
classes

0      1
1      1
2      1
3      1
4      1
      ..
173    1
174    1
175    1
176    1
177    1
Name: outcome, Length: 178, dtype: object

In [195]:
train = training_set[['age','symptoms', 'travel', 'chronic', 'outcome']]
train

Unnamed: 0,age,symptoms,travel,chronic,outcome
0,36,0.0,-1,-1,1
1,56,0.0,1,1,1
2,25,0.0,1,-1,1
3,40,0.0,-1,-1,1
4,40,3.0,1,-1,1
5,44,2.0,1,-1,1
6,29,1.0,1,-1,1
7,40,0.0,-1,-1,3
8,39,2.0,-1,-1,3
9,39,5.0,1,-1,1
