In [79]:
import pandas as pd
import numpy as np
import sklearn 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score,classification_report
from sklearn.datasets import load_iris
import warnings
%matplotlib inline
warnings.filterwarnings('ignore')


In [80]:
data = load_iris()
X, y = data.data, data.target
data


{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [81]:
X_df = pd.DataFrame(X)
X_df['catagory'] = y

In [82]:
data_df = X_df.rename(columns={0:'sepal_length',1:'sepal_width',2:'petal_length',3:'petal_width'})
data_df.to_csv('iris.csv')

In [83]:
def train_test_split_iris():
    iris = load_iris()
    X = iris.data
    y = iris.target

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=123)

    return X_train, X_test, y_train, y_test


def training(iters, batch_size, alpha, W):
    X_train, X_test, y_train, y_test = train_test_split_iris()

    
    # Convert target to one-hot encoding
    y_train_onehot = np.eye(3)[y_train]

    # Initialize history array to record loss and accuracy
    history = np.zeros((iters//100, 3))

    # Gradient descent
    for i in range(iters):
        # Randomly select a mini-batch of samples
        idx = np.random.choice(X_train.shape[0], batch_size)
        X_batch = X_train[idx]
        y_batch = y_train_onehot[idx]

        # Calculate softmax probabilities and cross-entropy loss
        scores = X_batch.dot(W)
        exp_scores = np.exp(scores)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        loss = -np.sum(y_batch * np.log(probs)) / batch_size

        # Calculate the gradient of the loss w.r.t. the weights
        grad = X_batch.T.dot(probs - y_batch) / batch_size

        # Update the weights
        W -= alpha * grad

        # Record history every 100 iterations
        if i % 100 == 0:
            y_test_onehot = np.eye(3)[y_test]  #target one-hot encoding
            scores = X_test.dot(W)
            exp_scores = np.exp(scores)
            probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
            test_loss = -np.sum(y_test_onehot * np.log(probs)) / len(y_test)
            test_acc = np.mean(np.argmax(probs, axis=1) == y_test)
            history[i//100] = [i, test_loss, test_acc]

            print(f"Iteration {i}, Test loss: {test_loss:.10f}, Test accuracy: {test_acc:.4f}")

    return W, history


In [85]:

# Set initial parameters
iters = 1000
alpha = 0.01
W = np.ones((4, 3))*np.mean([0.5, 1.5]) 

# Train three models with different batch sizes
W1, history1 = training(iters, 75, alpha, W)
W2, history2 = training(iters, 35, alpha, W)
W3, history3 = training(iters, 1, alpha, W)


Iteration 0, Test loss: 1.1011809654, Test accuracy: 0.2667
Iteration 100, Test loss: 0.7191183166, Test accuracy: 0.9600
Iteration 200, Test loss: 0.6009353240, Test accuracy: 0.7467
Iteration 300, Test loss: 0.5323568361, Test accuracy: 0.9067
Iteration 400, Test loss: 0.5052256582, Test accuracy: 0.7867
Iteration 500, Test loss: 0.4428982422, Test accuracy: 0.9867
Iteration 600, Test loss: 0.4398198037, Test accuracy: 0.9067
Iteration 700, Test loss: 0.4224046114, Test accuracy: 0.9067
Iteration 800, Test loss: 0.3820202817, Test accuracy: 0.9867
Iteration 900, Test loss: 0.3884691465, Test accuracy: 0.9067
Iteration 0, Test loss: 0.3771987244, Test accuracy: 0.9067
Iteration 100, Test loss: 0.3732416302, Test accuracy: 0.9067
Iteration 200, Test loss: 0.3429560240, Test accuracy: 0.9467
Iteration 300, Test loss: 0.3341704080, Test accuracy: 0.9467
Iteration 400, Test loss: 0.3143774507, Test accuracy: 0.9600
Iteration 500, Test loss: 0.3234389715, Test accuracy: 0.9067
Iteration 60