# Regularizações / Coeficientes
- l1
- l2
- elasticnet

In [2]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

### Carregando dados e Train e Test Split

In [5]:
df = pd.read_csv('wine.data', header=None)

df.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
                'Alcalinity of ash', 'Magnesium', 'Total phenols',
                'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
                'Proline']

print('Class labels', np.unique(df['Class label']))
df.head()

Class labels [1 2 3]


Unnamed: 0,Class label,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [6]:
X, y = df.iloc[:, 1:].values, df.iloc[:,0].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y)

### Standardization

In [7]:
std = StandardScaler()
X_train_std = std.fit_transform(X_train)
X_test_std = std.transform(X_test)

### Regularizações

In [10]:
model_l1 = linear_model.LogisticRegression(penalty='l1', random_state=0)
model_l1.fit(X_train_std, y_train)
y_pred = model_l1.predict(X_test_std)
print('X.shape: {}'.format(X.shape))
print('np.unique(y):{}'.format(np.unique(y)))
print('accuracy_score:{}\nintercept_: {}\ncoef_: {}'.format(accuracy_score(y_test, y_pred), model_l1.intercept_, model_l1.coef_))

X.shape: (178, 13)
np.unique(y):[1 2 3]
accuracy_score:1.0
intercept_: [-1.26393733 -1.21594878 -2.36997195]
coef_: [[ 1.24655602  0.17985511  0.7465467  -1.16451486  0.          0.
   1.15762945  0.          0.          0.          0.          0.56062
   2.50835351]
 [-1.53750917 -0.3869622  -0.99531308  0.36477996 -0.05931227  0.
   0.66812941  0.          0.         -1.93437381  1.23359092  0.
  -2.2314704 ]
 [ 0.13531688  0.16916055  0.35755735  0.          0.          0.
  -2.43499952  0.          0.          1.56327034 -0.81802688 -0.49540936
   0.        ]]


In [11]:
model_l2 = linear_model.LogisticRegression(penalty='l2', random_state=0)
model_l2.fit(X_train_std, y_train)
y_pred = model_l2.predict(X_test_std)
print('X.shape: {}'.format(X.shape))
print('np.unique(y):{}'.format(np.unique(y)))
print('accuracy_score:{}\nintercept_: {}\ncoef_: {}'.format(accuracy_score(y_test, y_pred), model_l2.intercept_, model_l2.coef_))

X.shape: (178, 13)
np.unique(y):[1 2 3]
accuracy_score:1.0
intercept_: [-1.33509136 -0.96973542 -2.04898785]
coef_: [[ 1.27713853  0.38210274  0.8015599  -1.30842842  0.22782837  0.23101419
   0.90234371 -0.08423823  0.01462196 -0.0312838   0.02796323  0.71703048
   1.79262118]
 [-1.45395571 -0.620303   -1.05445248  0.67148394 -0.29048951  0.18277571
   0.51163918  0.10789643  0.08199321 -1.61228834  0.88800662  0.1659356
  -1.73246957]
 [ 0.38965148  0.4083047   0.40211468  0.26242969  0.15288658 -0.20064653
  -1.38792256 -0.06305419 -0.28440345  1.2553389  -0.93849662 -0.83821807
   0.13754706]]


In [12]:
model_multinomial = linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg', penalty='l2', random_state=0)
model_multinomial.fit(X_train_std, y_train)
y_pred = model_multinomial.predict(X_test_std)
print('X.shape: {}'.format(X.shape))
print('np.unique(y):{}'.format(np.unique(y)))
print('accuracy_score:{}\nintercept_: {}\ncoef_: {}'.format(accuracy_score(y_test, y_pred), model_multinomial.intercept_, model_multinomial.coef_))

X.shape: (178, 13)
np.unique(y):[1 2 3]
accuracy_score:1.0
intercept_: [ 0.36862533  0.78036204 -1.14898737]
coef_: [[ 0.78792639  0.24089338  0.4473384  -0.73489829  0.1113343   0.22607273
   0.60652205 -0.14847572  0.24131476  0.14140959  0.10670518  0.58061102
   0.98240089]
 [-0.95393153 -0.43668273 -0.76795841  0.50646632 -0.17622299  0.07110642
   0.38809799  0.09343743  0.11353057 -0.91231356  0.61090399  0.16780023
  -1.10002567]
 [ 0.16600514  0.19578935  0.32062001  0.22843197  0.06488869 -0.29717916
  -0.99462004  0.05503829 -0.35484533  0.77090397 -0.71760916 -0.74841125
   0.11762478]]


In [13]:
model_mh = linear_model.SGDClassifier(loss='modified_huber', penalty='none', random_state=0)
model_mh.fit(X_train_std, y_train)
y_pred = model_mh.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model_mh.intercept_, model_mh.coef_))

accuracy_score:1.0
intercept_:
[-142.68148912 -125.44735328 -276.67748864]
coef_:
[[  90.2812871    36.64948258   97.04085794 -125.41752433  -28.19064734
    76.28288223   97.37681826  -41.40195898  -56.92549775   17.58870599
    27.37182208   73.34084621  211.19599252]
 [-263.95993746  -81.35192407 -134.50534288   85.42530184   40.65883345
     6.9551471   154.53036184  100.2277951   -28.48638801 -310.80926881
   114.21362832    7.64180817 -230.50397619]
 [ 135.67707929    4.84753801   85.28769102  100.2663828    -3.63503296
   -22.82533178 -171.25578945  -35.53445471  -51.06959705  152.05944999
  -176.79823346 -170.17637065   93.74735741]]




In [14]:
model_mhl1 = linear_model.SGDClassifier(loss='modified_huber', penalty='l1', random_state=0)
model_mhl1.fit(X_train_std, y_train)
y_pred = model_mhl1.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model_mhl1.intercept_, model_mhl1.coef_))

accuracy_score:1.0
intercept_:
[-142.68148912 -125.44735328 -306.35815856]
coef_:
[[  89.79866954   36.16686502   96.55824038 -124.93490677  -27.70802979
    75.80026467   96.8942007   -40.91934142  -56.44288019   17.10608844
    26.88920452   72.85822865  210.71337496]
 [-263.4773199   -80.86930651 -134.02272532   84.94268428   40.17621589
     6.47252954  154.04774428   99.74517754  -28.00377045 -310.32665125
   113.73101076    7.15919062 -230.02135863]
 [ 154.81672112   16.21945299   36.93842907   36.94965086  -34.96832968
    18.63499913 -146.8604452   -30.20730346  -51.9645466   169.96158763
  -128.81638452 -135.2615544    97.21165791]]




In [15]:
model_mhl2 = linear_model.SGDClassifier(loss='modified_huber', penalty='l2', random_state=0,)
model_mhl2.fit(X_train_std, y_train)
y_pred = model_mhl2.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model_mhl2.intercept_, model_mhl2.coef_))

accuracy_score:0.9814814814814815
intercept_:
[-133.55229223 -108.84105874 -283.84845145]
coef_:
[[  64.24492318   27.99158523   68.97230425  -78.44154333   -9.43156164
    53.00579554   68.95697172  -28.65065856  -31.27729694    9.10269409
    18.97960851   52.32850534  142.62880064]
 [-173.82810269  -32.58102496 -120.3782499    28.25905445  -23.1259601
     1.53120601   96.80522153   47.68519683  -44.39339951 -182.48718723
    99.25512486   10.89635644 -163.40514175]
 [ 102.82149297   13.06803523   40.26527851   55.04226335    0.29059667
   -56.00300297 -104.79731422  -25.38647002  -29.23814142  115.59206866
  -134.68444213 -102.6787633    62.19846117]]




In [16]:
model = linear_model.SGDClassifier(loss='modified_huber', penalty='elasticnet', random_state=0)
model.fit(X_train_std, y_train)
y_pred = model.predict(X_test_std)
print('accuracy_score:{}\nintercept_:\n{}\ncoef_:\n{}'.format(accuracy_score(y_test, y_pred), model.intercept_, model.coef_))

accuracy_score:1.0
intercept_:
[-115.76971554 -160.71704492 -285.30505012]
coef_:
[[ 123.91275499   27.25838834  122.67343871 -146.16408275  -33.48610385
    17.85290524   26.40894794    0.          -29.98815536  -12.68503339
    28.04845641   54.09734213  176.34210642]
 [-156.14636532  -12.2965563  -123.68993198   26.10839284    7.55016295
    -6.98576902   81.06592956   53.72891307  -45.28760889 -242.95726581
    78.15250959    7.51084648 -186.29067841]
 [ 112.36872187   17.4671006    43.78675923   40.4863028     3.30011746
     3.5951165  -152.39547824   -4.13187172 -109.01228301  135.25016979
  -102.56934514  -91.93306103   59.12231722]]




In [17]:
# loss: 'hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', or a 
# regression loss: 'squared_loss', 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'