# Feature selection - L1 Regularization

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_wine

#Load data
dataObj = load_wine()
X = dataObj.data
y = dataObj.target
# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Standardization
stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(solver='liblinear', penalty='l1', C=0.1, multi_class='ovr')
lr.fit(X_train_std, y_train)
print('Training accuracy:', lr.score(X_train_std, y_train))
print('Test accuracy:', lr.score(X_test_std, y_test))

In [None]:
print(lr.intercept_)

In [None]:
for count, coef in enumerate(lr.coef_):
    coef_print = ", ".join(f"{i:5.2f}" for i in coef)
    print(f"(Model:{count}) Coefs: {coef_print}")

In [None]:
#Calculate weight values for different values of C
cols = dataObj.feature_names
df2 = pd.DataFrame(columns=np.append(['C'], cols))
df2 = df2.set_index('C')
for c in np.arange(-4., 6.):
    C = 10.**c
    lr = LogisticRegression(solver='liblinear', penalty='l1', C=C, random_state=0, multi_class='ovr')
    lr.fit(X_train_std, y_train)
    df2.loc[C] =  lr.coef_[1]
    
display(df2)

In [None]:
# Plot the results
import seaborn as sns
cols = df2.columns.values
colors = sns.color_palette(n_colors=len(cols))
markers=['.',',','o','v','^','<','>','1','2','3','4','8','s','p','P','*','h','H','+','x','X','D','d','|','_']
markers = markers[:len(cols)]

fig, ax = plt.subplots(figsize=(10,5))
for col, color, marker in zip(cols,colors,markers):
    ax.plot(df2[col],label=col,color=color,marker=marker,markersize=8)

ax.set_xscale('log')
ax.legend(bbox_to_anchor=(1.04,1), loc="upper left")
ax.set_xlim([10.**-2, 10.**4])
ax.set_title('L1 Regularization Effect')
ax.set_xlabel('C')
ax.set_ylabel('Weight')
plt.show()

- Installing seaborn package

```python conda install -c anaconda seaborn ```

- Alternatively
```python 
colors = [(0.12156862745098039, 0.4666666666666667, 0.7058823529411765), (1.0, 0.4980392156862745, 0.054901960784313725), (0.17254901960784313, 0.6274509803921569, 0.17254901960784313), (0.8392156862745098, 0.15294117647058825, 0.1568627450980392), (0.5803921568627451, 0.403921568627451, 0.7411764705882353), (0.5490196078431373, 0.33725490196078434, 0.29411764705882354), (0.8901960784313725, 0.4666666666666667, 0.7607843137254902), (0.4980392156862745, 0.4980392156862745, 0.4980392156862745), (0.7372549019607844, 0.7411764705882353, 0.13333333333333333), (0.09019607843137255, 0.7450980392156863, 0.8117647058823529), (0.12156862745098039, 0.4666666666666667, 0.7058823529411765), (1.0, 0.4980392156862745, 0.054901960784313725), (0.17254901960784313, 0.6274509803921569, 0.17254901960784313)] ```