In [61]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [62]:
pip install ucimlrepo



In [63]:
#Importing the dataset into our code
from ucimlrepo import fetch_ucirepo

# fetch dataset
iris = fetch_ucirepo(id=53)

# data (as pandas dataframes)
X = iris.data.features
y = iris.data.targets

# metadata
print(iris.metadata)

# variable information
print(iris.variables)


{'uci_id': 53, 'name': 'Iris', 'repository_url': 'https://archive.ics.uci.edu/dataset/53/iris', 'data_url': 'https://archive.ics.uci.edu/static/public/53/data.csv', 'abstract': 'A small classic dataset from Fisher, 1936. One of the earliest known datasets used for evaluating classification methods.\n', 'area': 'Biology', 'tasks': ['Classification'], 'characteristics': ['Tabular'], 'num_instances': 150, 'num_features': 4, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1936, 'last_updated': 'Tue Sep 12 2023', 'dataset_doi': '10.24432/C56C76', 'creators': ['R. A. Fisher'], 'intro_paper': {'ID': 191, 'type': 'NATIVE', 'title': 'The Iris data set: In search of the source of virginica', 'authors': 'A. Unwin, K. Kleinman', 'venue': 'Significance, 2021', 'year': 2021, 'journal': 'Significance, 2021', 'DOI': '1740-9713.01589', 'URL': 'https://www.semanticscholar.org

In [64]:
#Loading iris dataset
iris = load_iris()
X = iris['data']
y = iris['target']
target_names = iris['target_names']

In [65]:
# Filter only versicolor (1) and virginica (2)
mask = (y == 1) | (y == 2)
X = X[mask]
y = y[mask]
y = y - 1  # Change labels: versicolor=0, virginica=1

# Normalize features
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [66]:
# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


In [67]:
# Loss function (binary cross-entropy)
def compute_loss(y, y_hat):
    m = len(y)
    return -np.mean(y * np.log(y_hat + 1e-8) + (1 - y) * np.log(1 - y_hat + 1e-8))


In [73]:
# Training with gradient descent
def train_logistic_regression(X, y, lr=0.1, epochs=1000):
    m, n = X.shape
    w = np.zeros(n)
    b = 0

    for epoch in range(epochs):
        z = np.dot(X, w) + b
        y_hat = sigmoid(z)
        loss = compute_loss(y, y_hat)

        # Gradients
        dw = np.dot(X.T, (y_hat - y)) / m
        db = np.sum(y_hat - y) / m

        # Update weights
        w -= lr * dw
        b -= lr * db

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.4f}")

    return w, b


In [74]:
# Predict function
def predict(X, w, b):
    probs = sigmoid(np.dot(X, w) + b)
    return (probs >= 0.5).astype(int)


In [75]:
# Train model
w, b = train_logistic_regression(X_train, y_train)

Epoch 0, Loss: 0.6931
Epoch 100, Loss: 0.1803
Epoch 200, Loss: 0.1230
Epoch 300, Loss: 0.0972
Epoch 400, Loss: 0.0822
Epoch 500, Loss: 0.0723
Epoch 600, Loss: 0.0653
Epoch 700, Loss: 0.0599
Epoch 800, Loss: 0.0557
Epoch 900, Loss: 0.0523


In [76]:
# Predict and evaluate
y_pred = predict(X_test, w, b)
accuracy = np.mean(y_pred == y_test)

print(f"\nTest Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 80.00%
