In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
dataframe = pd.read_csv("binary.csv")

In [4]:
X = dataframe[['gre', 'gpa', 'rank']]
y = dataframe['admit']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y, test_size=0.30, random_state=42)

Obtenha e compare os coeficientes (coef_ e intercept_) usando modelos com diferentes penalidades / regularizações:

In [6]:
stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)

In [19]:
# sem penalidade
model = linear_model.SGDClassifier(penalty='none', random_state=2, alpha=1, max_iter=100000)
model.fit(X_train_std, y_train)
y_pred = model.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model.intercept_, model.coef_))

accuracy_score:0.6833333333333333
intercept_:
[-0.99689864]
coef_:
[[ 0.05644248  0.07000314 -0.13338665]]


In [8]:
# com penalidade l1
model_l1 = linear_model.SGDClassifier(penalty='l1', random_state=0)
model_l1.fit(X_train_std, y_train)
y_pred = model_l1.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model_l1.intercept_, model_l1.coef_))

accuracy_score:0.5083333333333333
intercept_:
[3.62631271]
coef_:
[[ 4.92656849  1.03196634 -2.5324446 ]]


In [9]:
# com penalidade l2
model_l2 = linear_model.SGDClassifier(penalty='l2', random_state=0)
model_l2.fit(X_train_std, y_train)
y_pred = model_l2.predict(X_test_std)
print('X.shape:{}'.format(X.shape))
print('np.unique(y):{}'.format(np.unique(y)))
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model_l2.intercept_, model_l2.coef_))

X.shape:(400, 3)
np.unique(y):[0 1]
accuracy_score:0.6416666666666667
intercept_:
[-0.22167468]
coef_:
[[ 3.12541954 -0.2568051  -2.16727294]]


In [10]:
model_en = linear_model.SGDClassifier(penalty='elasticnet', random_state=0)
model_en.fit(X_train_std, y_train)
y_pred = model_en.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model_en.intercept_, model_en.coef_))

accuracy_score:0.44166666666666665
intercept_:
[7.89819919]
coef_:
[[ 2.50065045  4.94795678 -2.45861742]]


Para os experimentos com Regularização, crie também modelos variando a força da regularização (alpha). Use alpha com os seguintes valores: 0.0001, 0.01, 1, 10, 100.

In [14]:
regularizations_strength = [0.0001, 0.01, 1, 10, 100]
for elem in regularizations_strength:
    print("******   alpha = {}  ******\n".format(elem))

    # modelo com l1
    model_l1 = linear_model.SGDClassifier(penalty='l1', random_state=0, alpha=elem, max_iter=100000)
    model_l1.fit(X_train_std, y_train)
    y_pred = model_l1.predict(X_test_std)
    print("modelo com l1")
    print('accuracy_score: {}'.format(accuracy_score(y_test, y_pred)))
    print('intercept: {}'.format(model_l1.intercept_))
    print('coef_: {}\n'.format(model_l1.coef_))
    
    # modelo com l2
    model_l2 = linear_model.SGDClassifier(penalty='l2', random_state=0, alpha=elem, max_iter=100000)
    model_l2.fit(X_train_std, y_train)
    y_pred = model_l2.predict(X_test_std)
    print("modelo com l2")
    print('accuracy_score: {}'.format(accuracy_score(y_test, y_pred)))
    print('intercept: {}'.format(model_l2.intercept_))
    print('coef_: {}\n'.format(model_l2.coef_))
    
    # modelo usando elastic net
    model_elastic_net = linear_model.SGDClassifier(penalty='elasticnet',random_state=0, alpha=elem,max_iter=100000)
    model_elastic_net.fit(X_train_std, y_train)
    y_pred = model_elastic_net.predict(X_test_std)
    print("modelo com elastic net")
    print("accuracy_score: {}".format(accuracy_score(y_test, y_pred)))
    print("intercept: {}".format(model_elastic_net.intercept_))
    print("coef_: {}\n".format(model_elastic_net.coef_))

******   alpha = 0.0001  ******

modelo com l1
accuracy_score: 0.6833333333333333
intercept: [-0.99977638]
coef_: [[ 0.00432048  0.00096965 -0.00171951]]

modelo com l2
accuracy_score: 0.6833333333333333
intercept: [-0.99984943]
coef_: [[ 0.00439838  0.00095146 -0.00134044]]

modelo com elastic net
accuracy_score: 0.6833333333333333
intercept: [-0.99992601]
coef_: [[ 0.00371531  0.00064656 -0.00203015]]

******   alpha = 0.01  ******

modelo com l1
accuracy_score: 0.6833333333333333
intercept: [-0.99999545]
coef_: [[ 3.11438707e-05  8.32092484e-07 -1.38136852e-05]]

modelo com l2
accuracy_score: 0.6833333333333333
intercept: [-1.00000064]
coef_: [[ 4.28663444e-05  8.29170962e-06 -1.36487932e-05]]

modelo com elastic net
accuracy_score: 0.6833333333333333
intercept: [-0.99999633]
coef_: [[ 3.24743127e-05  7.26501113e-06 -1.97364363e-05]]

******   alpha = 1  ******

modelo com l1
accuracy_score: 0.6833333333333333
intercept: [-1.00000004]
coef_: [[0. 0. 0.]]

modelo com l2
accuracy_scor

In [20]:
from sklearn.ensemble import GradientBoostingClassifier

In [23]:
modelo = GradientBoostingClassifier()

modelo.fit(X_train, y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False)

In [24]:
modelo.feature_importances_

array([0.24852061, 0.6214773 , 0.13000209])

In [None]:
sa