<a href="https://colab.research.google.com/github/asmit-ayank/UML501/blob/main/Assignment_4_updated_UML501.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 4 Updated - Asmit Ayank (102497002)

In [None]:
import numpy as np
import pandas as pd

In [None]:
np.random.seed(42)


In [None]:
def generate_correlated_data(n_samples=500):
  base1 = np.random.randn(n_samples)
  base2 = np.random.randn(n_samples)
  X1 = base1 + np.random.randn(n_samples) * 0.3
  X2 = base1 + np.random.randn(n_samples) * 0.4
  X3 = 0.8 * base1 + 0.2 * base2 + np.random.randn(n_samples) * 0.3
  X4 = base2 + np.random.randn(n_samples) * 0.3
  X5 = base2 + np.random.randn(n_samples) * 0.35
  X6 = 0.6 * base1 + 0.4 * base2 + np.random.randn(n_samples) * 0.4
  X7 = 0.3 * base1 + 0.7 * base2 + np.random.randn(n_samples) * 0.35

  X = np.column_stack([X1, X2, X3, X4, X5, X6, X7])
  true_coefficients = np.array([3.5, -2.1, 1.8, 4.2, -1.5, 2.7, -3.0])
  y = X @ true_coefficients + np.random.randn(n_samples) * 2.0

  feature_names = [f'Feature_{i+1}' for i in range(7)]
  df = pd.DataFrame(X, columns=feature_names)
  df['Target'] = y
  return X, y, df, true_coefficients

In [None]:
generate_correlated_data(10)

(array([[ 0.93640878,  0.25603151,  0.52622776, -0.3661925 , -0.63112868,
          0.25721966, -0.2522633 ],
        [-0.20599719,  0.60264697, -0.15234691, -0.58125444, -0.5307104 ,
          0.34596414, -0.24250072],
        [ 0.667947  ,  0.64228965,  0.5318488 ,  0.03888567, -0.14525497,
          0.47106762,  0.88094307],
        [ 1.0956054 ,  1.09994548,  0.74543673, -1.72977736, -2.33195256,
          0.77436328, -1.06378179],
        [-0.39746819,  0.09486459, -0.97586286, -1.41561798, -1.44053379,
         -1.8783572 , -1.56066126],
        [-0.20086018, -0.72247442, -0.51572033, -0.28290349, -0.08760352,
         -0.03663618, -0.63945732],
        [ 1.23391474,  1.66275825,  0.9226124 , -1.26459638, -1.03803466,
          0.57721407,  0.0851728 ],
        [ 0.88014413, -0.01643332,  0.99393392,  0.22148362,  0.66548385,
          0.46655683,  0.56526644],
        [-0.64966599, -1.00074881, -0.45409884, -0.80864505, -0.78145147,
         -0.60818995, -0.96187524],
        [ 

In [None]:
def ridge(X, y, alpha, lambda_param, n_iterations):
  n_samples, n_features = X.shape
  weights = np.zeros(n_features)
  bias = 0

  for i in range(n_iterations):
    y_predicted = X.dot(weights) + bias

    dw = (1/n_samples) * (X.T.dot(y_predicted - y) + lambda_param * weights)
    db = (1/n_samples) * np.sum(y_predicted - y)

    weights -= alpha * dw
    bias -= alpha * db
    if np.isnan(weights).any() or np.isnan(bias):
        print(f"Warning: NaN values encountered in weights or bias at iteration {i} for alpha={alpha}, lambda_param={lambda_param}. Stopping training for this parameter combination.")
        return weights, bias

  return weights, bias

In [None]:
def ridge_boosting(X, y, alpha, lambda_param, n_estimators, learning_rate):
    n_samples = X.shape[0]
    models = []
    residuals = y.copy()

    for i in range(n_estimators):
        ridge_output = ridge(X, residuals, alpha, lambda_param, n_iterations=1000)
        print(f"Iteration {i}: Output from ridge function: {ridge_output}")
        weights, bias = ridge_output

        if np.isnan(weights).any() or np.isnan(bias):
             print(f"Warning: NaN values received from ridge function at boosting iteration {i}. Stopping boosting for this parameter combination.")
             break

        predictions = X.dot(weights) + bias
        residuals -= learning_rate * predictions
        models.append((weights, bias))

    return models

In [None]:
def predict_ridge_boosting(models, X, learning_rate):
    n_samples = X.shape[0]
    y_pred = np.zeros(n_samples)

    for weights, bias in models:
        y_pred += learning_rate * (X.dot(weights) + bias)

    return y_pred

In [None]:
alpha_values = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lambda_values = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]
boosting_learning_rate = 0.1
n_estimators = 100
X, y, df, true_coefficients = generate_correlated_data(n_samples=500)
X_train = X
y_train = y
X_test = X
y_test = y

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

results = []

for alpha in alpha_values:
    for lambda_param in lambda_values:
        print(f"Training with alpha={alpha}, lambda_param={lambda_param}")
        models = ridge_boosting(X_train_scaled, y_train, alpha, lambda_param, n_estimators, boosting_learning_rate)
        y_test_pred = predict_ridge_boosting(models, X_test_scaled, boosting_learning_rate)
        y_train_pred = predict_ridge_boosting(models, X_train_scaled, boosting_learning_rate)
        if not np.isnan(y_test_pred).any() and not np.isnan(y_train_pred).any():
            train_mse = mean_squared_error(y_train, y_train_pred)
            test_mse = mean_squared_error(y_test, y_test_pred)
            train_r2 = r2_score(y_train, y_train_pred)
            test_r2 = r2_score(y_test, y_test_pred)
            results.append({
                  'alpha': alpha,
                  'lambda_param': lambda_param,
                  'train_mse': train_mse,
                  'test_mse': test_mse,
                  'train_r2': train_r2,
                  'test_r2': test_r2,
              })
        else:
            print(f"Skipping evaluation for alpha={alpha}, lambda_param={lambda_param} due to NaN predictions.")


results_df = pd.DataFrame(results)
display(results_df)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
       0.11675464, 0.0171985 ]), np.float64(0.014313759540498246))
Iteration 39: Output from ridge function: (array([0.1055422 , 0.04018807, 0.0946398 , 0.07716632, 0.01341698,
       0.1138877 , 0.01507983]), np.float64(0.01417753963272148))
Iteration 40: Output from ridge function: (array([0.10301163, 0.03777365, 0.09186993, 0.07576747, 0.01214768,
       0.11111196, 0.01303585]), np.float64(0.014042616090390935))
Iteration 41: Output from ridge function: (array([0.10056011, 0.0354368 , 0.08918856, 0.07441733, 0.01092501,
       0.10842443, 0.01106407]), np.float64(0.013908976576371867))
Iteration 42: Output from ridge function: (array([0.09818513, 0.03317502, 0.08659283, 0.07311422, 0.00974735,
       0.10582223, 0.00916205]), np.float64(0.013776608870938343))
Iteration 43: Output from ridge function: (array([0.09588424, 0.03098592, 0.08407998, 0.07185651, 0.00861313,
       0.10330259, 0.00732745]), np.float64(0.01364

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  dw = (1/n_samples) * (X.T.dot(y_predicted - y) + lambda_param * weights)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  dw = (1/n_samples) * (X.T.dot(y_predicted - y) + lambda_param * weights)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(ob

Iteration 0: Output from ridge function: (array([nan, nan, nan, nan, nan, nan, nan]), np.float64(nan))
Training with alpha=10, lambda_param=10
Iteration 0: Output from ridge function: (array([nan, nan, nan, nan, nan, nan, nan]), np.float64(nan))
Training with alpha=10, lambda_param=20
Iteration 0: Output from ridge function: (array([nan, nan, nan, nan, nan, nan, nan]), np.float64(nan))


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Unnamed: 0,alpha,lambda_param,train_mse,test_mse,train_r2,test_r2
0,0.0001,1e-15,8.882533,8.882533,0.668599,0.668599
1,0.0001,1e-10,8.882533,8.882533,0.668599,0.668599
2,0.0001,1e-05,8.882533,8.882533,0.668599,0.668599
3,0.0001,0.001,8.882533,8.882533,0.668599,0.668599
4,0.0001,0.0,8.882533,8.882533,0.668599,0.668599
5,0.0001,1.0,8.882674,8.882674,0.668594,0.668594
6,0.0001,10.0,8.883937,8.883937,0.668547,0.668547
7,0.0001,20.0,8.885339,8.885339,0.668495,0.668495
8,0.001,1e-15,4.701098,4.701098,0.824606,0.824606
9,0.001,1e-10,4.701098,4.701098,0.824606,0.824606


In [None]:
best_mse_idx = results_df['test_mse'].idxmin()
best_mse_params = results_df.loc[best_mse_idx]
best_r2_idx = results_df['test_r2'].idxmax()
best_r2_params = results_df.loc[best_r2_idx]

print("Best parameters based on minimum test MSE:")
display(best_mse_params)

print("\nBest parameters based on maximum test R2 score:")
display(best_r2_params)

Best parameters based on minimum test MSE:


Unnamed: 0,24
alpha,0.1
lambda_param,1e-15
train_mse,4.120372
test_mse,4.120372
train_r2,0.846272
test_r2,0.846272



Best parameters based on maximum test R2 score:


Unnamed: 0,24
alpha,0.1
lambda_param,1e-15
train_mse,4.120372
test_mse,4.120372
train_r2,0.846272
test_r2,0.846272


In [None]:
best_r2_idx = results_df['test_r2'].idxmax()
best_r2 = results[best_r2_idx]

In [None]:
best_r2_idx

24

In [None]:
best_r2

{'alpha': 0.1,
 'lambda_param': 1e-15,
 'train_mse': 4.120372414216306,
 'test_mse': 4.120372414216306,
 'train_r2': 0.8462719697053458,
 'test_r2': 0.8462719697053458}

In [None]:
#question 3

In [None]:
import pandas as pd
import numpy as np
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

X_boston = data
y_boston = target
print("Features shape:", X_boston.shape)
print("Target shape:", y_boston.shape)

  raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)


Features shape: (506, 13)
Target shape: (506,)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_boston, y_boston, test_size=0.2, random_state=42)
print("Training features shape:", X_train.shape)
print("Testing features shape:", X_test.shape)
print("Training target shape:", y_train.shape)
print("Testing target shape:", y_test.shape)

Training features shape: (404, 13)
Testing features shape: (102, 13)
Training target shape: (404,)
Testing target shape: (102,)


In [None]:
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.model_selection import RepeatedKFold
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=42)
ridge_alphas = np.logspace(-6, 6, 13)
lasso_alphas = np.logspace(-6, 6, 13)

ridge_cv_model = RidgeCV(alphas=ridge_alphas, cv=cv)
ridge_cv_model.fit(X_train, y_train)

lasso_cv_model = LassoCV(alphas=lasso_alphas, cv=cv)
lasso_cv_model.fit(X_train, y_train)

print("Best alpha for RidgeCV:", ridge_cv_model.alpha_)
print("Best alpha for LassoCV:", lasso_cv_model.alpha_)

Best alpha for RidgeCV: 0.1
Best alpha for LassoCV: 1e-06


In [None]:
#question 4

# Task
Implement Multiclass Logistic Regression (step-by step) on Iris dataset using one vs. rest strategy.

## Load the dataset

### Subtask:
Load the Iris dataset using `load_iris` from `sklearn.datasets`.


**Reasoning**:
Load the Iris dataset using load_iris.



In [None]:
from sklearn.datasets import load_iris
iris = load_iris()

## Prepare data

### Subtask:
Separate features (X) and target (y) from the loaded Iris dataset. Understand the target variable (the different Iris species).


**Reasoning**:
Extract features and target from the iris dataset and print unique target values and names to understand the classes.



In [None]:
X = iris.data
y = iris.target

print("Unique target values:", np.unique(y))
print("Target class names:", iris.target_names)

Unique target values: [0 1 2]
Target class names: ['setosa' 'versicolor' 'virginica']


In [None]:
class BinaryLogisticRegression:
    def __init__(self, alpha=0.01, n_iterations=1000):
        self.alpha = alpha
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for i in range(self.n_iterations):
            linear_model = X.dot(self.weights) + self.bias
            y_predicted = self.sigmoid(linear_model)

            dw = (1 / n_samples) * X.T.dot(y_predicted - y)
            db = (1 / n_samples) * np.sum(y_predicted - y)

            self.weights -= self.alpha * dw
            self.bias -= self.alpha * db

    def predict(self, X):
        linear_model = X.dot(self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        return y_predicted > 0.5

In [None]:
binary_models = []
unique_classes = np.unique(y)

for class_label in unique_classes:
    y_binary = (y == class_label).astype(int)
    model = BinaryLogisticRegression()
    binary_models.append(model)

In [None]:
for i, class_label in enumerate(unique_classes):
    y_binary = (y == class_label).astype(int)
    binary_models[i].fit(X, y_binary)

In [None]:
class BinaryLogisticRegression:
    def __init__(self, alpha=0.01, n_iterations=1000):
        self.alpha = alpha
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for i in range(self.n_iterations):
            linear_model = X.dot(self.weights) + self.bias
            y_predicted = self.sigmoid(linear_model)

            dw = (1 / n_samples) * X.T.dot(y_predicted - y)
            db = (1 / n_samples) * np.sum(y_predicted - y)

            self.weights -= self.alpha * dw
            self.bias -= self.alpha * db

    def predict_proba(self, X):
        linear_model = X.dot(self.weights) + self.bias
        return self.sigmoid(linear_model)

    def predict(self, X):
        return self.predict_proba(X) > 0.5

binary_models = []
unique_classes = np.unique(y)

for class_label in unique_classes:
    y_binary = (y == class_label).astype(int)
    model = BinaryLogisticRegression()
    model.fit(X, y_binary)
    binary_models.append(model)


def predict_multiclass(binary_models, X):
    n_samples = X.shape[0]
    n_classes = len(binary_models)
    all_probabilities = np.zeros((n_samples, n_classes))

    for i, model in enumerate(binary_models):
        all_probabilities[:, i] = model.predict_proba(X)

    predicted_classes = np.argmax(all_probabilities, axis=1)
    return predicted_classes
multiclass_predictions = predict_multiclass(binary_models, X)
print("Multiclass predictions shape:", multiclass_predictions.shape)

Multiclass predictions shape: (150,)


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y, multiclass_predictions)
precision = precision_score(y, multiclass_predictions, average='weighted')
recall = recall_score(y, multiclass_predictions, average='weighted')
f1 = f1_score(y, multiclass_predictions, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"F1-score (weighted): {f1:.4f}")

Accuracy: 0.8733
Precision (weighted): 0.9082
Recall (weighted): 0.8733
F1-score (weighted): 0.8686
