In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

np.random.seed(42)
n = 1000
base = np.random.randn(n, 1)
X = np.hstack([base + np.random.randn(n, 1) * 0.05 for _ in range(7)])
y = 5 * base.flatten() + np.random.randn(n) * 0.5

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

def ridge_gd(X, y, lr, lam, steps=1000):
    n, d = X.shape
    w = np.zeros(d)
    b = 0
    for _ in range(steps):
        ypred = X.dot(w) + b
        err = ypred - y
        dw = (2/n) * X.T.dot(err) + 2 * lam * w
        db = (2/n) * np.sum(err)
        w = w - lr * dw
        b = b - lr * db
    return w, b

lrs = [0.0001, 0.001, 0.01, 0.1]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]

bestr2 = -999
bestlr = None
bestlambda = None
bestcost = None

for lr in lrs:
    for lam in lambdas:
        w, b = ridge_gd(Xtrain, ytrain, lr, lam)
        ypred = Xtest.dot(w) + b
        if np.any(np.isnan(ypred)) or np.any(np.isinf(ypred)):
            continue
        cost = np.mean((ypred - ytest)**2) + lam * np.sum(w**2)
        r2 = r2_score(ytest, ypred)
        if r2 > bestr2:
            bestr2 = r2
            bestlr = lr
            bestlambda = lam
            bestcost = cost

print(bestlr)
print(bestlambda)
print( bestcost)
print( bestr2)


0.01
1e-15
0.2129031228249526
0.9911651090304562


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score

url = "https://gist.githubusercontent.com/keeganhines/59974f1ebef97bbaa44fb19143f90bad/raw/Hitters.csv"
data = pd.read_csv(url)

data = data.dropna()

for col in data.columns:
    if data[col].dtype == 'object':
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])

X = data.drop('Salary', axis=1)
y = data['Salary']

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

lin = LinearRegression().fit(Xtrain, ytrain)
ridge = Ridge(alpha=0.5748).fit(Xtrain, ytrain)
lasso = Lasso(alpha=0.5748).fit(Xtrain, ytrain)

r2_lin = r2_score(ytest, lin.predict(Xtest))
r2_ridge = r2_score(ytest, ridge.predict(Xtest))
r2_lasso = r2_score(ytest, lasso.predict(Xtest))

best_model = max(
    [('Linear', r2_lin), ('Ridge', r2_ridge), ('Lasso', r2_lasso)],key=lambda x: x[1])

print("Best model:", best_model[0])



Best model: Ridge


  model = cd_fast.enet_coordinate_descent(


In [None]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import r2_score

data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 20]

ridge = RidgeCV(alphas=alphas, cv=5)
ridge.fit(Xtrain, ytrain)
ypred_ridge = ridge.predict(Xtest)
r2_ridge = r2_score(ytest, ypred_ridge)

lasso = LassoCV(alphas=alphas, cv=5, max_iter=10000)
lasso.fit(Xtrain, ytrain)
ypred_lasso = lasso.predict(Xtest)
r2_lasso = r2_score(ytest, ypred_lasso)

print( ridge.alpha_)
print(r2_ridge)
print( lasso.alpha_)
print(r2_lasso)


10.0
0.5959440604913042
0.001
0.5963975777208825


In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

data = load_iris()
X = data.data
y = data.target

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def train_logistic(X, y, lr=0.1, steps=1000):
    n, d = X.shape
    w = np.zeros(d)
    b = 0
    for _ in range(steps):
        z = X.dot(w) + b
        ypred = sigmoid(z)
        dw = (1/n) * np.dot(X.T, (ypred - y))
        db = (1/n) * np.sum(ypred - y)
        w -= lr * dw
        b -= lr * db
    return w, b

classes = np.unique(ytrain)
weights = []
biases = []

for c in classes:
    y_binary = np.where(ytrain == c, 1, 0)
    w, b = train_logistic(Xtrain, y_binary, lr=0.1, steps=1000)
    weights.append(w)
    biases.append(b)

def predict(X):
    preds = []
    for i in range(len(X)):
        scores = []
        for j in range(len(classes)):
            z = np.dot(X[i], weights[j]) + biases[j]
            scores.append(sigmoid(z))
        preds.append(np.argmax(scores))
    return np.array(preds)

ypred = predict(Xtest)
print(accuracy_score(ytest, ypred))


0.9111111111111111
