In [42]:
#Q1
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler


In [43]:


np.random.seed(42)
# One main feature
x1 = np.random.rand(200)
x2 = x1 + np.random.normal(0, 0.01, 200)
x3 = x1 + np.random.normal(0, 0.02, 200)
x4 = x1 + np.random.normal(0, 0.03, 200)
x5 = x1 + np.random.normal(0, 0.04, 200)
x6 = x1 + np.random.normal(0, 0.05, 200)
x7 = x1 + np.random.normal(0, 0.06, 200)

x = np.column_stack((x1, x2, x3, x4, x5, x6, x7))
y = 5 * x1 + np.random.normal(0, 0.1, 200)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


In [44]:
def ridge_regression(x, y, lr, lam, it=1000):
    m, n = x.shape
    wgt = np.zeros(n)
    for _ in range(it):
        y_pred = x.dot(wgt)
        gradient = (-2/m) * x.T.dot(y - y_pred) + 2 * lam * wgt
        gradient[0] -= 2 * lam * wgt[0]   # no regularization
        wgt -= lr * gradient
        
        if np.any(np.isnan(wgt)) or np.any(np.isinf(wgt)):
            return np.zeros(n), np.inf
    cost = np.mean((y - x.dot(wgt))**2) + lam * np.sum(wgt**2)
    return wgt, cost

learning_rates = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]
best = (None, float('inf'), -1)
for lr in learning_rates:
    for lam in lambdas:
        wgt, cost = ridge_regression(x, y, lr, lam)
        r2 = r2_score(y, x.dot(wgt))
        if cost < best[1] and r2 > best[2]:
            best = (wgt, cost, r2)

print("Best weights:", best[0])
print("Min Cost:", best[1])
print("Max R2 Score:", best[2])

Best weights: [0.89132991 0.91550612 0.80148265 0.77504513 0.66544283 0.51398531
 0.4389618 ]
Min Cost: 0.0134892198821966
Max R2 Score: 0.9937799376373252


  gradient = (-2/m) * x.T.dot(y - y_pred) + 2 * lam * wgt
  gradient[0] -= 2 * lam * wgt[0]   # no regularization
  gradient[0] -= 2 * lam * wgt[0]   # no regularization


In [45]:
#Q2
#dataset not available

In [46]:
#Q3
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

data = fetch_california_housing()
x, y = data.data, data.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

a= np.logspace(-3, 3, 100)
rid= RidgeCV(alphas=a)
les=LassoCV(alphas=a)
ridge =rid.fit(x_train, y_train)
lasso = les.fit(x_train, y_train)

print("ridge best alpha:", ridge.alpha_)
print("ridge R2:", r2_score(y_test, ridge.predict(x_test)))
print("lasso best alpha:", lasso.alpha_)
print("lasso R2:", r2_score(y_test, lasso.predict(x_test)))

ridge best alpha: 2.1544346900318843
ridge R2: 0.5758478813838472
lasso best alpha: 0.001
lasso R2: 0.5768562568705682


In [48]:
#ques 4

import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

iris = load_iris()
x = iris.data
y = iris.target
uni= np.unique(y)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def train_logreg(x, y, lr=0.1, it=1000):
    m, n = x.shape
    wt = np.zeros(n)
    for _ in range(it):
        z = x.dot(wt)
        h = sigmoid(z)
        grad = (1/m) * x.T.dot(h - y)
        wt -= lr * grad
    return wt

weight= []
for u in uni:
    y_binary = (y_train == u).astype(int)
    wt = train_logreg(x_train, y_binary)
    weight.append(wt)

weight = np.array(weight)


def prediction(x):
    probs = sigmoid(x.dot(weight.T))  
    return np.argmax(probs, axis=1)

y_pred = prediction(x_test)

acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy: ",acc)
print("\nConfusion Matrix:\n", cm)


Accuracy:  0.8222222222222222

Confusion Matrix:
 [[19  0  0]
 [ 0  7  6]
 [ 0  2 11]]
