Import module:

In [160]:
import sklearn
from sklearn.datasets import load_boston
import numpy as np
import pandas as pd

Import data:

In [161]:
candidates = {'gmat': [780,750,690,710,680,730,690,720,740,690,610,690,710,680,770,610,580,650,540,590,620,600,550,550,570,670,660,580,650,660,640,620,660,660,680,650,670,580,590,690],
              'gpa': [4,3.9,3.3,3.7,3.9,3.7,2.3,3.3,3.3,1.7,2.7,3.7,3.7,3.3,3.3,3,2.7,3.7,2.7,2.3,3.3,2,2.3,2.7,3,3.3,3.7,2.3,3.7,3.3,3,2.7,4,3.3,3.3,2.3,2.7,3.3,1.7,3.7],
              'work_experience': [3,4,3,5,4,6,1,4,5,1,3,5,6,4,3,1,4,6,2,3,2,1,4,1,2,6,4,2,6,5,1,2,4,6,5,1,2,1,4,5],
              'admitted': [1,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,1]
              }

df = pd.DataFrame(candidates,columns= ['gmat', 'gpa','work_experience','admitted'])
df.head(3)

Unnamed: 0,gmat,gpa,work_experience,admitted
0,780,4.0,3,1
1,750,3.9,4,1
2,690,3.3,3,0


In [162]:
data = df[['gmat', 'gpa','work_experience']]
target = df['admitted']

In [163]:
data.shape

(40, 3)

In [164]:
target.shape

(40,)

Split data processing:

In [165]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    data, target, test_size=0.2, random_state=42)

In [166]:
def namestr(obj, namespace):
    return [name for name in namespace if namespace[name] is obj]

for i in [X_train, X_test, y_train, y_test]:
    print(f'Shape of {namestr(i, globals())[0]} is: {i.shape}')

Shape of X_train is: (32, 3)
Shape of X_test is: (8, 3)
Shape of y_train is: (32,)
Shape of y_test is: (8,)


Create Model:

In [167]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

Model train:

In [168]:
model.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

Model predict:

In [169]:
y_pred = model.predict(X_test)
y_pred

array([0, 0, 0, 1, 1, 1, 0, 0])

# Model check:

Confusion matrix:

In [170]:
from sklearn.metrics import confusion_matrix
conf_m = confusion_matrix(y_test,y_pred)
conf_m

array([[5, 1],
       [0, 2]])

In [171]:
import plotly.figure_factory as ff

# CHANGE 

x = ['pred_' + str(n) for n in range(conf_m.shape[0])]
y = ['true_' + str(n) for n in range(conf_m.shape[0]-1,-1,-1)]

# change each element of z to type string for annotations
z_text = [[str(y) for y in x] for x in conf_m][::-1]

# set up figure 
fig = ff.create_annotated_heatmap(conf_m,
                                  x=x,
                                  y=y,
                                  annotation_text=z_text,
                                  colorscale='Viridis')

# add title
fig.update_layout(title_text='<i><b>Confusion matrix</b></i>',
                  #xaxis = dict(title='x'),
                  #yaxis = dict(title='x')
                 )

# add custom xaxis title
fig.add_annotation(dict(font=dict(color="black",size=14),
                        x=0.5,
                        y=-0.15,
                        showarrow=False,
                        text="Predicted value",
                        xref="paper",
                        yref="paper"))

# add custom yaxis title
fig.add_annotation(dict(font=dict(color="black",size=14),
                        x=-0.35,
                        y=0.5,
                        showarrow=False,
                        text="Real value",
                        textangle=-90,
                        xref="paper",
                        yref="paper"))

# adjust margins to make room for yaxis title
fig.update_layout(margin=dict(t=50, l=200))

# add colorbar
fig['data'][0]['showscale'] = True
fig.show()

Classification report:

In [172]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      0.83      0.91         6
           1       0.67      1.00      0.80         2

    accuracy                           0.88         8
   macro avg       0.83      0.92      0.85         8
weighted avg       0.92      0.88      0.88         8



In [173]:
from sklearn.metrics import roc_curve, auc
import plotly.express as px

fpr, tpr, thresholds = roc_curve(y_test, y_pred,pos_label=1)

fig = px.area(
    x=fpr, y=tpr,
    title=f'ROC Curve (AUC={auc(fpr, tpr):.4f})',
    labels=dict(x='False Positive Rate', y='True Positive Rate'),
    width=700, height=500,)
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1 )

fig.update_yaxes(scaleanchor="x", scaleratio=1)
fig.update_xaxes(constrain='domain')
fig.show()