In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score, mean_squared_error

In [5]:
# Read the data from 'BreastCancerData.csv' into a pandas DataFrame
df = pd.read_csv('NewData.csv')

In [8]:
# Extract all columns except the first one and store them in X as a NumPy array
X = df.iloc[:, 1:].values

# Extract the first column and store it in y as a NumPy array
y = df.iloc[:, 0].values


In [10]:
# we split the data into training and testing data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2)

In [12]:
# Applying ridge regression(using inbuilt class)

# Creating a Ridge Regression model
model1 = Ridge(alpha = 0.02)

# Fitting training data into the model
model1.fit(X_train,y_train)

# Predicting target values for testing data
y_pred1 = model1.predict(X_test)

In [36]:
# r2 score of inbuilt model
r2_score(y_test,y_pred1)*100

72.90662836594721

In [16]:
print(model1.coef_)
print(model1.intercept_)

[-9.95041884e-02  1.01494057e-02 -5.54337905e-03  8.95426086e-04
  2.90983568e-01 -3.41130945e+00  1.86807023e+00  1.88497450e+00
  3.46945903e-01 -6.17286685e-02  2.38911113e-01  5.37410856e-02
 -4.47318889e-04 -9.42185633e-04  1.11733138e+00  4.69806070e-01
 -1.77306569e+00  9.96957641e-01  9.43236016e-01  2.18275340e-01
  2.34246283e-01  2.66802586e-05 -3.29170801e-04 -1.36302155e-03
  2.08385809e+00  3.70867514e-01  1.13895741e-01  7.04920378e-01
  3.85330942e-01  9.25479606e-01]
-1.8478075621915648


In [18]:
# Creating ridge class
class RidgeReg:
    def __init__(self,alpha=0.1):
        self.alpha = alpha
        self.coef_ = None
        self.intercept_ = None
    def fit(self, X_train, y_train):
        # Insert a column of ones for the intercept term
        X_train = np.insert(X_train, 0, 1, axis=1)
        
        # Identity matrix with zero for the intercept term
        I = np.identity(X_train.shape[1])
        I[0, 0] = 0  # No regularization for the intercept term
        
        # Ridge regression closed-form solution
        result = np.linalg.inv(X_train.T.dot(X_train) + self.alpha * I).dot(X_train.T).dot(y_train)
        self.intercept_ = result[0]
        self.coef_ = result[1:]

    def predict(self, X_test):
        # Insert a column of ones for the intercept term
        X_test = np.insert(X_test, 0, 1, axis=1)
        return np.dot(X_test, np.insert(self.coef_, 0, self.intercept_))

In [20]:
# Applying ridge regression(using self-made class)

# Creating a Ridge Regression model
model2 = Ridge(alpha = 0.02)

# Fitting training data into the model
model2.fit(X_train,y_train)

# Predicting target values for testing data
y_pred2 = model2.predict(X_test)

In [38]:
# r2 score of self-made model
r2_score(y_test,y_pred2)*100

72.90662836594721

In [24]:
# We can see both models are giving similar answers

In [42]:
# Applying ridge regression(with inbuilt SGDRegressor in Ridge Regression)
model3 = Ridge(alpha = 0.001, max_iter = 1000, solver = 'sparse_cg')

# Training data
model3.fit(X_train,y_train)

# Predicting target values for testing data
y_pred3 = model3.predict(X_test)

#Calculating accuracy score
r2_score(y_test,y_pred3)*100


68.81173009516921

In [44]:
# Coefficient and intercept of (inbuilt model)
print(model3.coef_)
print(model3.intercept_)

[-0.07279504 -0.01408125 -0.00601826  0.00065228  0.0138263   0.02243204
  0.05067797  0.02392671  0.02011316  0.00438869  0.05912694  0.01779145
  0.08176666 -0.00275348  0.00286934  0.00441119  0.01012221  0.00289725
  0.00378675  0.00095812  0.25564416  0.02040786 -0.00311966 -0.0011924
  0.02719217  0.07791756  0.14145984  0.04132657  0.04412699  0.01420217]
-1.762897129597659


In [46]:
# Lasso Regression
model4 = Lasso(alpha=0.0001)

# Training data
model4.fit(X_train,y_train)

# Predicting target values for testing data
y_pred4 = model4.predict(X_test)

#Calculating accuracy score
r2_score(y_test,y_pred4)*100


  model = cd_fast.enet_coordinate_descent(


72.79958615003808