In [1]:
from path import Path
import pandas as pd

In [2]:
data = 'clean_data.csv'
df = pd.read_csv(data)
df.head(10)

Unnamed: 0,id_registro,uci,neumonia,diabetes,epoc,asma,cardiovascular,inmusupr,hipertension,renal_cronica,tabaquismo
0,z2eace,1,1,1,1,1,1,1,1,1,1
1,z3c500,1,0,1,1,1,1,1,0,1,1
2,z39e04,1,0,1,1,1,1,1,1,1,0
3,z50698,1,0,0,1,1,1,1,1,1,1
4,1c4536,1,1,1,1,1,1,1,1,1,1
5,8-Feb-00,1,1,1,1,1,1,1,1,1,1
6,060aa5,0,0,1,1,1,1,1,1,1,1
7,1c250e,1,0,0,1,1,1,1,0,1,1
8,01094d,1,0,1,1,1,1,1,1,1,1
9,46071,1,0,1,1,1,1,1,1,1,1


In [3]:
df.count()

id_registro       96118
uci               96118
neumonia          96118
diabetes          96118
epoc              96118
asma              96118
cardiovascular    96118
inmusupr          96118
hipertension      96118
renal_cronica     96118
tabaquismo        96118
dtype: int64

 ## Separate the Features (X) from the Target (y)

In [8]:
y = df["uci"]
y=y.astype('int')
X = df.drop(columns=["uci", "id_registro"])
X.dtypes

neumonia          int64
diabetes          int64
epoc              int64
asma              int64
cardiovascular    int64
inmusupr          int64
hipertension      int64
renal_cronica     int64
tabaquismo        int64
dtype: object

 ## Split our data into training and testing

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
X_train.head(5)

Unnamed: 0,neumonia,diabetes,epoc,asma,cardiovascular,inmusupr,hipertension,renal_cronica,tabaquismo
25300,0,1,1,1,1,1,1,1,0
18732,1,1,1,1,1,1,1,1,0
39153,0,1,1,1,1,1,1,1,1
42593,1,1,1,1,1,1,1,1,1
18763,1,1,1,1,1,1,1,1,1


 ## Create a Logistic Regression Model

In [10]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)

 ## Fit (train) or model using the training data

In [11]:
classifier.fit(X_train, y_train)

LogisticRegression(max_iter=200, random_state=1)

 ## Make predictions

In [12]:
y_pred = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head(20)

Unnamed: 0,Prediction,Actual
0,1,1
1,1,1
2,1,1
3,1,1
4,1,1
5,1,1
6,1,1
7,1,0
8,1,1
9,1,1


In [13]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

0.9215147732001665


In [14]:
from sklearn.metrics import confusion_matrix, classification_report
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[    0  1886]
 [    0 22144]]


In [15]:
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1886
           1       0.92      1.00      0.96     22144

    accuracy                           0.92     24030
   macro avg       0.46      0.50      0.48     24030
weighted avg       0.85      0.92      0.88     24030



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
from sklearn.svm import SVC
model = SVC(kernel='linear')

In [17]:
model.fit(X_train, y_train)

SVC(kernel='linear')

In [18]:
y_pred = model.predict(X_test)
results = pd.DataFrame({
   "Prediction": y_pred,
   "Actual": y_test
}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,1,1
1,1,1
2,1,1
3,1,1
4,1,1


In [19]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.9215147732001665

In [20]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1886
           1       0.92      1.00      0.96     22144

    accuracy                           0.92     24030
   macro avg       0.46      0.50      0.48     24030
weighted avg       0.85      0.92      0.88     24030



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
