**ASSIGNMENT-5**

Q1


In [3]:


import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split


np.random.seed(42)
n = 500

X1 = np.random.randn(n)
X2 = X1 + np.random.randn(n)*0.1
X3 = X1*0.5 + np.random.randn(n)*0.1
X4 = X2 + np.random.randn(n)*0.2
X5 = X3*1.2 + np.random.randn(n)*0.1
X6 = X1 + X3 + np.random.randn(n)*0.1
X7 = X2 - X3 + np.random.randn(n)*0.1

X = np.column_stack([X1,X2,X3,X4,X5,X6,X7])
y = 4*X1 + 2*X2 - 3*X3 + np.random.randn(n)*0.2


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


def ridge_gradient_descent(X, y, lr, lambd, iterations=1000):
    m, n = X.shape
    w = np.zeros(n)

    for _ in range(iterations):
        y_pred = np.dot(X, w)
        gradient = (1/m) * np.dot(X.T, (y_pred - y)) + (lambd/m) * w
        w = w - lr * gradient

    return w


learning_rates = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0.1, 1, 10, 20]

best_r2 = -999
best_set = None

for lr in learning_rates:
    for lam in lambdas:
        w = ridge_gradient_descent(X_train, y_train, lr, lam)
        y_pred = np.dot(X_test, w)

        if np.isnan(y_pred).any() or np.isinf(y_pred).any():
            continue

        r2 = r2_score(y_test, y_pred)

        if r2 > best_r2:
            best_r2 = r2
            best_set = (lr, lam, r2)

print("Best LR:", best_set[0])
print("Best Lambda:", best_set[1])
print("Best R2 Score:", best_set[2])

Best LR: 0.1
Best Lambda: 1e-15
Best R2 Score: 0.9958296100774573


  w = w - lr * gradient


Q2

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score


df = pd.read_csv("/content/Hitters (1).csv")

# (a)
df = df.dropna()


df = pd.get_dummies(df, drop_first=True)

# (b)
X = df.drop("Salary", axis=1)
y = df["Salary"]


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# (c)
lin = LinearRegression().fit(X_train, y_train)
ridge = Ridge(alpha=0.5748).fit(X_train, y_train)
lasso = Lasso(alpha=0.5748, max_iter=5000).fit(X_train, y_train)


models = {
    "Linear Regression": lin,
    "Ridge Regression": ridge,
    "LASSO Regression": lasso
}

for name, model in models.items():
    y_pred = model.predict(X_test)
    print(name, " → R2 Score:", r2_score(y_test, y_pred))



Linear Regression  → R2 Score: 0.29074518557981444
Ridge Regression  → R2 Score: 0.2997888803309703
LASSO Regression  → R2 Score: 0.29942440974749995


Q3

In [5]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


data = fetch_california_housing()
X = data.data
y = data.target


scaler = StandardScaler()
X = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


ridge_cv = RidgeCV(alphas=[0.1, 1, 10, 50, 100])
ridge_cv.fit(X_train, y_train)


lasso_cv = LassoCV(alphas=[0.1, 1, 10, 50, 100], max_iter=5000)
lasso_cv.fit(X_train, y_train)

print("Best Ridge Alpha:", ridge_cv.alpha_)
print("Best Lasso Alpha:", lasso_cv.alpha_)

print("RidgeCV R2 Score:", r2_score(y_test, ridge_cv.predict(X_test)))
print("LassoCV R2 Score:", r2_score(y_test, lasso_cv.predict(X_test)))



Best Ridge Alpha: 1.0
Best Lasso Alpha: 0.1
RidgeCV R2 Score: 0.5758185345428238
LassoCV R2 Score: 0.4814202815313765


Q4

In [6]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


iris = load_iris()
X = iris.data
y = iris.target


scaler = StandardScaler()
X = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def train_binary_logistic(X, y, lr=0.1, iterations=2000):
    m, n = X.shape
    w = np.zeros(n)

    for _ in range(iterations):
        z = np.dot(X, w)
        h = sigmoid(z)
        gradient = np.dot(X.T, (h - y)) / m
        w -= lr * gradient

    return w


classes = np.unique(y)
weights = []

for c in classes:
    y_binary = (y_train == c).astype(int)
    w = train_binary_logistic(X_train, y_binary)
    weights.append(w)

weights = np.array(weights)



import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


iris = load_iris()
X = iris.data
y = iris.target


scaler = StandardScaler()
X = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def train_binary_logistic(X, y, lr=0.1, iterations=2000):
    m, n = X.shape
    w = np.zeros(n)

    for _ in range(iterations):
        z = np.dot(X, w)
        h = sigmoid(z)
        gradient = np.dot(X.T, (h - y)) / m
        w -= lr * gradient

    return w

classes = np.unique(y)
weights = []

for c in classes:
    y_binary = (y_train == c).astype(int)
    w = train_binary_logistic(X_train, y_binary)
    weights.append(w)

weights = np.array(weights)


preds = []
for x in X_test:
    scores = [np.dot(w, x) for w in weights]
    preds.append(np.argmax(scores))

acc = accuracy_score(y_test, preds)
print("OVR Logistic Regression Accuracy:", acc)

preds = []
for x in X_test:
    scores = [np.dot(w, x) for w in weights]
    preds.append(np.argmax(scores))

acc = accuracy_score(y_test, preds)
print("OVR Logistic Regression Accuracy:", acc)


OVR Logistic Regression Accuracy: 0.8666666666666667
OVR Logistic Regression Accuracy: 0.8666666666666667
