# Fetching & Loading Data

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

iris = load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])

In [None]:
iris_df.head()

In [None]:
iris_df.info()

In [None]:
iris_df = iris_df.rename(columns={"sepal length (cm)": "sepal_length", 
                        "sepal width (cm)": "sepal_width", 
                        "petal length (cm)": "petal_length",
                       "petal width (cm)": "petal_width"})
iris_df.head()

In [None]:
iris_df["target"].value_counts()

In [None]:
iris_df.describe()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
iris_df.hist(bins=50, figsize=(20,15))
plt.show()

In [None]:
import seaborn as sns
# represent feature correlation matrix using a heatmap
corr = iris_df.corr()

# plot the heatmap
sns.heatmap(corr, 
        xticklabels=corr.columns,
        yticklabels=corr.columns)

In [None]:
corr["target"].sort_values(ascending=False)

# Perceptron

In [None]:
def generate_data_for_perceptron(cols_to_select):
    y = iris_df.iloc[0:100, 4].values
    y = np.where(y == 0, -1, 1)
    X = iris_df.iloc[0:100][cols_to_select].values
    X = np.concatenate((np.ones((100,1), dtype=int,),X),axis=1) #add 1 for the bias
    return X, y

In [None]:
X, y = generate_data_for_perceptron(['sepal_length', 'petal_length'])

In [None]:
plt.scatter(X[0:50,1], X[0:50, 2], color='red', marker='o', label='setosa')
plt.scatter(X[50:100, 1], X[50:100, 2], color='blue', marker='x', label='versicolor')
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.legend(loc='upper left')
plt.show()

## Question 1 : Perceptron Weight Update Rule

Fill out the weights update rule for perceptrion algorithm, try to not look at the code snippet in the slides.

In [None]:
rgen= np.random.RandomState(42)
w_ = rgen.normal(loc=0.0, scale=0.01, size=X.shape[1])
errors_ = []
# learning rate
eta = 0.1
def fit(X, y):
    for _ in range(10):
        errors = 0
        for xi, target in zip(X, y):
            update = # TODO :: expect one line of code 
            w_[1:] += # TODO :: expect one line of code 
            w_[0] += # TODO :: expect one line of code
            errors += int(update != 0.0)
            errors_.append(errors_)
    return w_, errors_
    
def net_input(X):
    return np.dot(X, w_) 

def predict(X):
    return np.where(net_input(X) >= 0.0, 1, -1)

In [None]:
w_, errors_ = fit(X, y)

In [None]:
from matplotlib.colors import ListedColormap

def plot_decision_regions(X, y, classifier = None, resolution=0.02):
    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap= ListedColormap(colors[:len(np.unique(y))])
    # plot the decision surface
    x1_min, x1_max = X[:, 1].min() -1, X[:, 1].max() + 1
    x2_min, x2_max = X[:, 2].min() -1, X[:, 2].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),np.arange(x2_min, x2_max, resolution))
    ones = np.ones((1,71675), dtype=int)
    if classifier != None:
        arr = np.array([xx1.ravel(), xx2.ravel()]).T
        arr = np.concatenate((np.ones((arr.shape[0],1), dtype=int), arr), axis=1)
        Z = classifier.predict(arr)
    else:
        Z = predict(np.array([np.ones((1,71675), dtype=int),xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())
    # plot class samples
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 1], 
                    y=X[y == cl, 2],
                    alpha=0.8, 
                    c=colors[idx],
                    marker=markers[idx], 
                    label=cl, 
                    edgecolor='black')

In [None]:
# plot the classification result
plot_decision_regions(X, y)
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.legend(loc='upper left')
plt.show()

# Adaline

In [None]:
rgen= np.random.RandomState(42)
w_ = rgen.normal(loc=0.0, scale=0.01, size=X.shape[1])
cost_ = []
n_iter = 100
eta = 0.01

In [None]:
def shuffle(X, y):
    """Shuffle training data"""
    r = rgen.permutation(len(y))
    return X[r], y[r]

In [None]:
def net_input(X):
    """Calculate net input"""
    return np.dot(X, w_) 

## Question 2 : Adaline Weight Update Rule

Fill out the weights update rule for Adaline algorithm, try to not look at the code snippet in the slides.

In [None]:
def update_weights(xi, target,w_):
    """Apply Adalinelearning rule to update the weights"""
    output = # TODO :: expect one line of code 
    error = # TODO :: expect one line of code
    w_ += # TODO :: expect one line of code
    cost = # TODO :: expect one line of code
    return cost

In [None]:
def activation(X):
    """Compute linear activation"""
    return X

In [None]:
def predict(X):
    """Return class label after unit step"""
    return np.where(activation(net_input(X)) >= 0.0, 1, -1)

In [None]:
def fit(X, y,w_):
    for i in range(n_iter):
        if shuffle:
            X, y = shuffle(X, y)
            cost = []
            for xi, target in zip(X, y):
                cost.append(update_weights(xi, target,w_))
                avg_cost= sum(cost) / len(y)
                cost_.append(avg_cost)

In [None]:
fit(X,y,w_)

In [None]:
plot_decision_regions(X, y)
plt.title('Adaline-Stochastic Gradient Descent')
plt.xlabel('sepal length [standardized]')
plt.ylabel('petal length [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
plt.plot(range(1, len(cost_) + 1), cost_, marker='o')
plt.xlabel('Epochs')

# Logistic Regression

## Question 3 : Implement Logistic Regression 

In this exercise, you need to implement a Logistic Regression model by using sklearn LogisticRegression class, look at sklearn document for LogisticRegression (https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) and fill out the following code cell. 

In [None]:
from sklearn.linear_model import LogisticRegression
# TODO :: expect 1 line of code

lr.fit(X, y)

In [None]:
plot_decision_regions(X, y, lr)

## Question 3 : Play with Regularization Strength

Parameter C in LogisticRegression is used to control the regularization strength, it's a critical way to control overfitting,  try out differnt values (1, 10, 100) for C and plot the corresponding decision region, what observation do you make by comparing different decision regions? what value of C do you think is the best?
