In [22]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, RidgeCV, LassoCV, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing, load_iris
import warnings
warnings.filterwarnings("ignore")


In [23]:
np.random.seed(42)
n=200
X=np.random.rand(n,7)

for i in range(1,7):
    X[:,i]=X[:,0]+np.random.normal(0,0.01,n)
y=3*X[:,0]+2*X[:,1]+np.random.normal(0,0.1,n)

sc=StandardScaler()

X=sc.fit_transform(X)
X=np.c_[np.ones(n),X]

def ridge_gd(X,y,lr,lmbd,iters):
    m,n=X.shape
    theta=np.zeros(n)

    for _ in range(iters):
        y_pred=X.dot(theta)
        if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)):
            return theta,np.inf,-np.inf
        grad=(1/m)*X.T.dot(y_pred-y)+2*lmbd*theta
        theta-=lr*grad

    y_pred=X.dot(theta)
    cost=(1/(2*m))*np.sum((y_pred-y)**2)+lmbd*np.sum(theta**2)

    return theta,cost,r2_score(y,y_pred)

best=(None,np.inf,-np.inf)

for lr in [0.0001,0.001,0.01,0.1]:
    for lmbd in [1e-15,1e-10,1e-5,1e-3,0,1,10,20]:
        theta,cost,r2=ridge_gd(X,y,lr,lmbd,1000)
        
        if np.isfinite(cost) and r2>best[2]:
            best=(theta,cost,r2)

best

(array([2.51524691, 0.21972822, 0.24003557, 0.17667498, 0.20696572,
        0.17638631, 0.22801418, 0.19245823]),
 0.004450316454950786,
 0.9957240028129284)

In [24]:
#url='https://drive.google.com/uc?id=1qzCKF6JKKMB0p7ul_lLy8tdmRk3vE_bG'
df=pd.read_csv('/Users/anmol117/Downloads/Hitters.csv')

df=df.dropna(subset=['Salary'])
df=df.fillna(df.median(numeric_only=True))

df=pd.get_dummies(df,drop_first=True)

X=df.drop('Salary',axis=1)
y=df['Salary']

sc=StandardScaler()
X=sc.fit_transform(X)

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

lin=LinearRegression().fit(X_train,y_train)
ridge=Ridge(alpha=0.5748).fit(X_train,y_train)
lasso=Lasso(alpha=0.5748).fit(X_train,y_train)

r2_lin=r2_score(y_test,lin.predict(X_test))
r2_ridge=r2_score(y_test,ridge.predict(X_test))
r2_lasso=r2_score(y_test,lasso.predict(X_test))

print("R2 Score: ")
print("Linear:", r2_lin,", Ridge:", r2_ridge,", Lasso:", r2_lasso)

R2 Score: 
Linear: 0.29074518557981477 , Ridge: 0.2997888803309673 , Lasso: 0.2990653179473026


In [25]:
df_house=pd.read_csv("/Users/anmol117/Downloads/housing.csv")

df_house=df_house.dropna()

X=df_house.iloc[:,:-1]
y=df_house.iloc[:,-1]

sc=StandardScaler()
X=sc.fit_transform(X)

ridge_cv=RidgeCV(alphas=[0.1,1,10]).fit(X,y)
lasso_cv=LassoCV(alphas=[0.1,1,10]).fit(X,y)

print("Rigde:",ridge_cv.alpha_,", Lasso:", lasso_cv.alpha_)

Rigde: 10.0 , Lasso: 0.1


In [26]:
iris=load_iris()

X=iris.data
y=iris.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

model=LogisticRegression(multi_class='ovr',max_iter=1000).fit(X_train,y_train)

r2 = r2_score(y_test,model.predict(X_test))

print("R2 Score: ", r2)

R2 Score:  0.9523052464228935
