Name: Harshita <br>
Roll Number: 102317003<br>
SubGroup: 3Q11

Q1: Generate a dataset with atleast seven highly correlated columns and a target variable.
Implement Ridge Regression using Gradient Descent Optimization. Take different
values of learning rate (such as 0.0001,0.001,0.01,0.1,1,10) and regularization
parameter (10-15,10-10,10-5,10- 3,0,1,10,20). Choose the best parameters for which ridge
regression cost function is minimum and R2_score is maximum.  

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

np.random.seed(0)
N = 500
z_val = np.random.randn(N)
features = np.column_stack([z_val + 0.01 * np.random.randn(N) for _ in range(7)])
features = np.column_stack([features, 0.5 * z_val + 0.2 * np.random.randn(N)])
true_weights = np.array([3, -2, 1, 0, 0.5, -1, 2, 4], dtype=float)
target = features.dot(true_weights) + 0.5 * np.random.randn(N)

X_tr, X_te, y_tr, y_te = train_test_split(features, target, test_size=0.25, random_state=1)

mean_vals = X_tr.mean(axis=0)
std_vals = X_tr.std(axis=0)
std_vals = np.where(np.isfinite(std_vals) & (std_vals > 0), std_vals, 1.0)
X_tr = (X_tr - mean_vals) / std_vals
X_te = (X_te - mean_vals) / std_vals

def ridge_reg(X, y, alpha, lam, iters=2000):
    X = X.astype(np.float64); y = y.astype(np.float64)
    m, n = X.shape
    wts = np.zeros(n, dtype=np.float64)
    bias = 0.0
    for _ in range(iters):
        preds = X.dot(wts) + bias
        diff = preds - y
        grad_wts = (2 / m) * (X.T.dot(diff)) + 2 * lam * wts
        grad_bias = (2 / m) * diff.sum()
        wts -= alpha * grad_wts
        bias -= alpha * grad_bias
        if not (np.isfinite(wts).all() and np.isfinite(bias)):
            return None
    return wts, bias

alphas = [0.0001, 0.001, 0.01, 0.1]
lambdas = [0, 0.001, 0.01, 0.1, 1, 10]
top_r2 = -1
opt_params = None

for a in alphas:
    for lam in lambdas:
        outcome = ridge_reg(X_tr, y_tr, a, lam)
        if outcome is None:
            continue
        wts, bias = outcome
        preds = X_te.dot(wts) + bias
        if not np.isfinite(preds).all():
            continue
        r2_val = r2_score(y_te, preds)
        if r2_val > top_r2:
            top_r2 = r2_val
            opt_params = (a, lam, r2_val)

print("Optimal LR, Lambda, R2 =", opt_params)

Optimal LR, Lambda, R2 = (0.1, 0, 0.9920703853817209)


Q2. Load the Hitters dataset from the following link
https://drive.google.com/file/d/1qzCKF6JKKMB0p7ul_lLy8tdmRk3vE_bG/view?usp=sharing   <br>
(a)
Pre-process the data (null values, noise, categorical to numerical encoding)<br>
(b) Separate input and output features and perform scaling <br>
(c) Fit a Linear, Ridge (use regularization parameter as 0.5748), and LASSO (use
regularization parameter as 0.5748) regression function on the dataset. <br>
(d) Evaluate the performance of each trained model on test set. Which model
performs the best and Why?

In [5]:
import pandas as pd
data = pd.read_csv("Hitters.csv")
data = data.dropna(subset=["Salary"])
data = data.fillna(data.median(numeric_only=True))
for col in ["League", "Division", "NewLeague"]:
    data[col] = data[col].astype("category").cat.codes

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X = data.drop("Salary", axis=1)
y = data["Salary"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

model_lr = LinearRegression().fit(X_train, y_train)
model_ridge = Ridge(alpha=0.5748).fit(X_train, y_train)
model_lasso = Lasso(alpha=0.5748, max_iter=5000).fit(X_train, y_train)

results = []
for name, mdl in [("Linear", model_lr), ("Ridge", model_ridge), ("Lasso", model_lasso)]:
    preds = mdl.predict(X_test)
    mse = mean_squared_error(y_test, preds)
    r2 = r2_score(y_test, preds)
    results.append((name, mse, r2))
    print(f"{name}: MSE={mse:.2f}, R2={r2:.4f}")

best = max(results, key=lambda x: x[2])
print(f"\nBest Model: {best[0]} (R2={best[2]:.4f})")


Linear: MSE=131898.53, R2=0.5532
Ridge: MSE=128967.77, R2=0.5631
Lasso: MSE=128572.34, R2=0.5644

Best Model: Lasso (R2=0.5644)


Q3. Explore Ridge Cross Validation (RidgeCV) and Lasso Cross Validation (LassoCV)
function of Python. Implement both on Boston House Prediction Dataset (load_boston
dataset from sklearn.datasets).  

In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import r2_score, mean_squared_error

url = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
data = pd.read_csv(url)

X = data.drop('medv', axis=1)
y = data['medv']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

alphas = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

ridge = RidgeCV(alphas=alphas, cv=10)
ridge.fit(X_train_scaled, y_train)

lasso = LassoCV(alphas=alphas, cv=10, max_iter=10000, random_state=42)
lasso.fit(X_train_scaled, y_train)

pred_ridge = ridge.predict(X_test_scaled)
pred_lasso = lasso.predict(X_test_scaled)

print("Ridge Alpha:", ridge.alpha_, "| R2:", round(r2_score(y_test, pred_ridge), 4))
print("Lasso Alpha:", lasso.alpha_, "| R2:", round(r2_score(y_test, pred_lasso), 4))
print("\nRidge RMSE:", round(np.sqrt(mean_squared_error(y_test, pred_ridge)), 2))
print("Lasso RMSE:", round(np.sqrt(mean_squared_error(y_test, pred_lasso)), 2))


Ridge Alpha: 10.0 | R2: 0.7073
Lasso Alpha: 0.001 | R2: 0.7112

Ridge RMSE: 4.67
Lasso RMSE: 4.64


Q4. Multiclass Logistic Regression: Implement Multiclass Logistic Regression (step-by step)
on Iris dataset using one vs. rest strategy?

In [7]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, classification_report

iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr = LogisticRegression(max_iter=1000, random_state=42)
ovr_model = OneVsRestClassifier(lr)
ovr_model.fit(X_train, y_train)

y_pred = ovr_model.predict(X_test)

print("Accuracy:", round(accuracy_score(y_test, y_pred), 4))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))

for i, clf in enumerate(ovr_model.estimators_):
    print(f"Class {i} vs Rest - Accuracy:", round(clf.score(X_test, (y_test == i).astype(int)), 4))


Accuracy: 0.9737

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       1.00      0.91      0.95        11
   virginica       0.92      1.00      0.96        12

    accuracy                           0.97        38
   macro avg       0.97      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38

Class 0 vs Rest - Accuracy: 1.0
Class 1 vs Rest - Accuracy: 0.7632
Class 2 vs Rest - Accuracy: 1.0
