### Model Diagnostics in Python

In this notebook, you will be trying out some of the model diagnostics you saw from Sebastian, but in your case there will only be two cases - either admitted or not admitted.

First let's read in the necessary libraries and the dataset.

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score
from sklearn.model_selection import train_test_split
np.random.seed(42)

df = pd.read_csv('admissions.csv')
df.head()

Unnamed: 0,admit,gre,gpa,prestige
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


In [8]:
df[['prest_1', 'prest_2', 'prest_3', 'prest_4']] = pd.get_dummies(df['prestige'])
df.head()

Unnamed: 0,admit,gre,gpa,prestige,prest_1,prest_2,prest_3,prest_4
0,0,380,3.61,3,0,0,1,0
1,1,660,3.67,3,0,0,1,0
2,1,800,4.0,1,1,0,0,0
3,1,640,3.19,4,0,0,0,1
4,0,520,2.93,4,0,0,0,1


In [13]:
X = df.drop(['admit', 'prestige', 'prest_4'], axis=1)
y = df['admit']
X_train, X_test, y_train, y_test = train_test_split(
          X, y, test_size=0.10, random_state=42)

In [14]:
log_mod = LogisticRegression()
log_mod.fit(X_train, y_train)
preds = log_mod.predict(X_test)
confusion_matrix(y_test, preds) 



array([[23,  1],
       [14,  2]])

In [15]:
precision_score(y_test, preds) 

0.6666666666666666

In [16]:
recall_score(y_test, preds)

0.125

In [17]:
accuracy_score(y_test, preds)

0.625