In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.utils import resample
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [2]:
# Read in data
mod_data = pd.read_csv('appointments_clean.csv')

In [3]:
mod_data.columns

Index(['PatientId', 'AppointmentID', 'Gender', 'ScheduledDay',
       'AppointmentDay', 'Age', 'Neighbourhood', 'Scholarship', 'Hipertension',
       'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received', 'No-show',
       'DaysBetween', 'NoShow', 'PreviousMiss'],
      dtype='object')

In [4]:
# Split into X and y
X = mod_data.drop(
    ['PatientId', 'AppointmentID', 'ScheduledDay', 'AppointmentDay', 
     'Neighbourhood', 'No-show', 'NoShow'], 
    axis = 1
)
y = mod_data.NoShow

# Test/train split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.3, random_state = 314
)

In [5]:
# Logistic regression
clf_lr = LogisticRegression(
    solver = 'warn',
    penalty = 'none'
).fit(X_train, y_train)
pred_lr = clf_lr.predict(X_test)
prob_lr = clf_lr.predict_proba(X_test)



ValueError: Logistic Regression supports only penalties in ['l1', 'l2'], got none.

In [None]:
# Regularized regression
clf_rr = LogisticRegression(
    penalty = 'elasticnet'
).fit(X_train, y_train)

pred_rr = clf_rr.predict(X_test)
pred_rr = clf_rr.predict_proba(X_test)

In [None]:
# Random forest
clf_rf = RandomForestClassifier(
    n_estimators = 100,
    random_state = 314
).fit(X_train, y_train)

pred_rf = clf_rf.predict(X_test)
prob_rf = clf_rf.predict_proba(X_test)

In [None]:
# Gradient boosting
clf_gb = GradientBoostingClassifier(
    random_state = 314
).fit(X_train, y_train)

pred_gb = clf_gb.predict(X_test)
prob_gb = clf_gb.predict_proba(X_test)

In [None]:
# Check model performance
print('Logistic Regression')
print(classification_report(y_test, pred_lr))

print('Random Forest')
print(classification_report(y_test, pred_rf))

print('Gradient Boosting')
print(classification_report(y_test, pred_gb))
