<a href="https://colab.research.google.com/github/arpit-devop/machine-learning-assignment/blob/main/ml5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
from sklearn.metrics import r2_score

# Synthetic data generation
np.random.seed(0)
n_samples, n_features = 100, 7
X = np.random.randn(n_samples, n_features)
X = X + np.random.randn(n_samples, 1)  # induce correlation
y = X @ np.array([2, 3, -1, 0.5, 4, -2, 1]) + np.random.randn(n_samples)

# Ridge Regression (Gradient Descent)
def ridge_grad_desc(X, y, lr, lmbd, n_iter=2000):
    w = np.zeros(X.shape[1])
    for iteration in range(n_iter):
        grad = (2 / X.shape[0]) * X.T @ (X @ w - y) + 2 * lmbd * w
        w -= lr * grad
        if np.isnan(w).any():
            print(f"NaN encountered at iteration {iteration} with lr={lr}, lambda={lmbd}")
            # Return the weights from the previous iteration before they became NaN
            return w + lr * grad
    return w


lrs = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]

best_score = -np.inf
best_params = None
for lr in lrs:
    for lmbd in lambdas:
        w = ridge_grad_desc(X, y, lr, lmbd)
        if not np.isnan(w).any():  # Check for NaN in weights
            y_pred = X @ w
            score = r2_score(y, y_pred)
            if score > best_score:
                best_score = score
                best_params = (lr, lmbd)

print("Best params (LR, lambda):", best_params)
print("Best R2:", best_score)

NaN encountered at iteration 823 with lr=0.1, lambda=10
NaN encountered at iteration 481 with lr=0.1, lambda=20
NaN encountered at iteration 279 with lr=1, lambda=1e-15
NaN encountered at iteration 279 with lr=1, lambda=1e-10
NaN encountered at iteration 279 with lr=1, lambda=1e-05
NaN encountered at iteration 279 with lr=1, lambda=0.001
NaN encountered at iteration 279 with lr=1, lambda=0
NaN encountered at iteration 264 with lr=1, lambda=1
NaN encountered at iteration 204 with lr=1, lambda=10
NaN encountered at iteration 179 with lr=1, lambda=20
NaN encountered at iteration 145 with lr=10, lambda=1e-15
NaN encountered at iteration 145 with lr=10, lambda=1e-10
NaN encountered at iteration 145 with lr=10, lambda=1e-05
NaN encountered at iteration 145 with lr=10, lambda=0.001
NaN encountered at iteration 145 with lr=10, lambda=0
NaN encountered at iteration 141 with lr=10, lambda=1
NaN encountered at iteration 122 with lr=10, lambda=10
NaN encountered at iteration 113 with lr=10, lambda

  grad = (2 / X.shape[0]) * X.T @ (X @ w - y) + 2 * lmbd * w
  grad = (2 / X.shape[0]) * X.T @ (X @ w - y) + 2 * lmbd * w
  grad = (2 / X.shape[0]) * X.T @ (X @ w - y) + 2 * lmbd * w
  grad = (2 / X.shape[0]) * X.T @ (X @ w - y) + 2 * lmbd * w
  return w + lr * grad
  w -= lr * grad
  grad = (2 / X.shape[0]) * X.T @ (X @ w - y) + 2 * lmbd * w


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import r2_score

# Load dataset
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
df = pd.read_csv(url)
df = df.dropna()

# Encode categoricals
for col in df.select_dtypes(include='object').columns:
    df[col] = LabelEncoder().fit_transform(df[col])

# Separate features and target
X = df.drop('Survived', axis=1) # Updated target column to 'Survived'
y = df['Survived'] # Updated target column to 'Survived'

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Models
lr = LinearRegression()
ridge = Ridge(alpha=0.5748)
lasso = Lasso(alpha=0.5748)

# Fit & Score
models = {'Linear': lr, 'Ridge': ridge, 'Lasso': lasso}
for name, model in models.items():
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    print(f'{name} R2:', r2_score(y_test, pred))

Linear R2: 0.24147786237411006
Ridge R2: 0.24203760726933898
Lasso R2: -0.01130022321428581


In [12]:
import pandas as pd
import numpy as np
from sklearn.linear_model import RidgeCV, LassoCV

# Load dataset from original source
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep=r"\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]


# Ridge and Lasso Cross Validation
ridgecv = RidgeCV(alphas=[0.1, 1.0, 10.0], cv=5)
lassocv = LassoCV(alphas=[0.1, 1.0, 10.0], cv=5)

ridgecv.fit(X, y)
lassocv.fit(X, y)

print("Best Ridge alpha:", ridgecv.alpha_)
print("Best Lasso alpha:", lassocv.alpha_)
print("RidgeCV R2:", ridgecv.score(X, y))
print("LassoCV R2:", lassocv.score(X, y))

Best Ridge alpha: 10.0
Best Lasso alpha: 0.1
RidgeCV R2: 0.7315744764907257
LassoCV R2: 0.7269834862602695


In [9]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X, y = iris.data, iris.target

# One-vs-rest Logistic Regression
clf = LogisticRegression(multi_class='ovr', solver='lbfgs', max_iter=200)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print("Accuracy (One-vs-rest):", accuracy_score(y_test, pred))


Accuracy (One-vs-rest): 0.9555555555555556


