In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
dataframe = pd.read_csv("binary.csv")

In [4]:
X = dataframe[['gre', 'gpa', 'rank']]
y = dataframe['admit']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y, test_size=0.30, random_state=42)

Obtenha e compare os coeficientes (coef_ e intercept_) usando modelos com diferentes penalidades / regularizações:

In [6]:
stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)

In [7]:
# sem penalidade
model = linear_model.SGDClassifier(penalty='none', random_state=0)
model.fit(X_train_std, y_train)
y_pred = model.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model.intercept_, model.coef_))

accuracy_score:0.5083333333333333
intercept_:
[3.40628323]
coef_:
[[ 4.07354881  1.70586347 -3.23681358]]


In [8]:
# com penalidade l1
model_l1 = linear_model.SGDClassifier(penalty='l1', random_state=0)
model_l1.fit(X_train_std, y_train)
y_pred = model_l1.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model_l1.intercept_, model_l1.coef_))

accuracy_score:0.5083333333333333
intercept_:
[3.62631271]
coef_:
[[ 4.92656849  1.03196634 -2.5324446 ]]


In [9]:
# com penalidade l2
model_l2 = linear_model.SGDClassifier(penalty='l2', random_state=0)
model_l2.fit(X_train_std, y_train)
y_pred = model_l2.predict(X_test_std)
print('X.shape:{}'.format(X.shape))
print('np.unique(y):{}'.format(np.unique(y)))
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model_l2.intercept_, model_l2.coef_))

X.shape:(400, 3)
np.unique(y):[0 1]
accuracy_score:0.6416666666666667
intercept_:
[-0.22167468]
coef_:
[[ 3.12541954 -0.2568051  -2.16727294]]


In [10]:
model_en = linear_model.SGDClassifier(penalty='elasticnet', random_state=0)
model_en.fit(X_train_std, y_train)
y_pred = model_en.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model_en.intercept_, model_en.coef_))

accuracy_score:0.44166666666666665
intercept_:
[7.89819919]
coef_:
[[ 2.50065045  4.94795678 -2.45861742]]


Para os experimentos com Regularização, crie também modelos variando a força da regularização (alpha). Use alpha com os seguintes valores: 0.0001, 0.01, 1, 10, 100.

In [11]:
regularizations_strength = [0.0001, 0.01, 1, 10, 100]
for elem in regularizations_strength:
    print("******   alpha = {}  ******\n".format(elem))

    # modelo com l1
    model_l1 = linear_model.SGDClassifier(penalty='l1', random_state=0, alpha=elem)
    model_l1.fit(X_train_std, y_train)
    y_pred = model_l1.predict(X_test_std)
    print("modelo com l1")
    print('accuracy_score: {}'.format(accuracy_score(y_test, y_pred)))
    print('intercept: {}'.format(model_l1.intercept_))
    print('coef_: {}\n'.format(model_l1.coef_))
    
    # modelo com l2
    model_l2 = linear_model.SGDClassifier(penalty='l2', random_state=0, alpha=elem)
    model_l2.fit(X_train_std, y_train)
    y_pred = model_l1.predict(X_test_std)
    print("modelo com l2")
    print('accuracy_score: {}'.format(accuracy_score(y_test, y_pred)))
    print('intercept: {}'.format(model_l2.intercept_))
    print('coef_: {}\n'.format(model_l2.coef_))
    
    # modelo usando elastic net
    model_elastic_net = linear_model.SGDClassifier(penalty='elasticnet')
    model_elastic_net.fit(X_train_std, y_train)
    y_pred = model_elastic_net.predict(X_test_std)
    print("modelo com elastic net")
    print("accuracy_score: {}".format(accuracy_score(y_test, y_pred)))
    print("intercept: {}".format(model_elastic_net.intercept_))
    print("coef_: {}\n".format(model_elastic_net.coef_))

******   alpha = 0.0001  ******

modelo com l1
accuracy_score: 0.5083333333333333
intercept: [3.62631271]
coef_: [[ 4.92656849  1.03196634 -2.5324446 ]]

modelo com l2
accuracy_score: 0.5083333333333333
intercept: [-0.22167468]
coef_: [[ 3.12541954 -0.2568051  -2.16727294]]

modelo com elastic net
accuracy_score: 0.65
intercept: [-3.62304034]
coef_: [[ 0.25605601  5.1596418  -0.32971012]]

******   alpha = 0.01  ******

modelo com l1
accuracy_score: 0.6833333333333333
intercept: [-0.9252897]
coef_: [[ 0.          0.         -0.03984293]]

modelo com l2
accuracy_score: 0.6833333333333333
intercept: [-0.99591171]
coef_: [[-0.04731102  0.10569639 -0.22124028]]

modelo com elastic net
accuracy_score: 0.675
intercept: [-16.87667373]
coef_: [[ 0.50072383 -4.64805586  5.91427037]]

******   alpha = 1  ******

modelo com l1
accuracy_score: 0.6833333333333333
intercept: [-0.99925628]
coef_: [[0. 0. 0.]]

modelo com l2
accuracy_score: 0.6833333333333333
intercept: [-0.99883472]
coef_: [[-0.00151