<a href="https://colab.research.google.com/github/hevendra630/LINEAR-REGRESSION-ON-BREAST-CANCER--2/blob/main/Linear_regression_on_breast_cancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ==============================
# Linear Regression from Scratch
# Breast Cancer Dataset
# ==============================

# Step 1: Set Kaggle API credentials
import os
os.environ['KAGGLE_USERNAME'] = 'hevendrabage'
os.environ['KAGGLE_KEY'] = '61a2ae3c4dfb6ea39592d76f1b5a29c3'

# Step 2: Download dataset from Kaggle
!kaggle datasets download -d uciml/breast-cancer-wisconsin-data
!unzip -o breast-cancer-wisconsin-data.zip

# Step 3: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Step 4: Load dataset
data = pd.read_csv("data.csv")

# Drop ID and unnamed column if present
data = data.drop(["id", "Unnamed: 32"], axis=1)

# Encode diagnosis column (M=1, B=0)
data["diagnosis"] = data["diagnosis"].map({"M":1, "B":0})

# Features and target
X = data.drop("diagnosis", axis=1).values
y = data["diagnosis"].values

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Implement Linear Regression from scratch (Gradient Descent)
class LinearRegressionScratch:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.epochs):
            y_pred = np.dot(X, self.weights) + self.bias
            dw = (1/n_samples) * np.dot(X.T, (y_pred - y))
            db = (1/n_samples) * np.sum(y_pred - y)
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

# Train model
model = LinearRegressionScratch(lr=0.01, epochs=1000)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Step 6: Evaluate
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("From Scratch Implementation:")
print("MSE:", mse)
print("RMSE:", rmse)
print("R² Score:", r2)


Dataset URL: https://www.kaggle.com/datasets/uciml/breast-cancer-wisconsin-data
License(s): CC-BY-NC-SA-4.0
Downloading breast-cancer-wisconsin-data.zip to /content
  0% 0.00/48.6k [00:00<?, ?B/s]
100% 48.6k/48.6k [00:00<00:00, 108MB/s]
Archive:  breast-cancer-wisconsin-data.zip
  inflating: data.csv                
From Scratch Implementation:
MSE: 0.058847505572277164
RMSE: 0.2425850481218436
R² Score: 0.7494981387431006


In [2]:
# ==============================
# Linear Regression with Scikit-Learn
# Breast Cancer Dataset
# ==============================

# Step 1: Set Kaggle API credentials
import os
os.environ['KAGGLE_USERNAME'] = 'hevendrabage'
os.environ['KAGGLE_KEY'] = '6de80633bee4d30fd783110c86ea5047'

# Step 2: Download dataset from Kaggle
!kaggle datasets download -d uciml/breast-cancer-wisconsin-data
!unzip -o breast-cancer-wisconsin-data.zip

# Step 3: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Step 4: Load dataset
data = pd.read_csv("data.csv")

# Drop ID and unnamed column if present
data = data.drop(["id", "Unnamed: 32"], axis=1)

# Encode diagnosis column (M=1, B=0)
data["diagnosis"] = data["diagnosis"].map({"M":1, "B":0})

# Features and target
X = data.drop("diagnosis", axis=1).values
y = data["diagnosis"].values

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Train Linear Regression model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predictions
y_pred = lr.predict(X_test)

# Step 6: Evaluate
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Scikit-Learn Implementation:")
print("MSE:", mse)
print("RMSE:", rmse)
print("R² Score:", r2)


Dataset URL: https://www.kaggle.com/datasets/uciml/breast-cancer-wisconsin-data
License(s): CC-BY-NC-SA-4.0
breast-cancer-wisconsin-data.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  breast-cancer-wisconsin-data.zip
  inflating: data.csv                
Scikit-Learn Implementation:
MSE: 0.0641088624702946
RMSE: 0.25319727974505296
R² Score: 0.7271016126223555
