**Simple Linear Regression - Scratch**

**Mount Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Import Libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import math
import time
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix, mean_absolute_error, explained_variance_score

**Load Dataset**

In [None]:
data = pd.read_csv('/content/drive/MyDrive/dataset/Placement_Data.csv')
data.head(5)

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [None]:
x = data.iloc[:, :1].values
y = data.iloc[:, 1:].values

# Then create a dataset for training (80% : 20%)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((160, 1), (40, 1), (160, 1), (40, 1))

In [None]:
class LinearRegressionScratch():
  def __init__(self):
    self.slope = None
    self.intercept = None

  def fit(self, x, y):
    n = len(x)
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    numerator = 0
    denominator = 0
    for i in range(n):
      numerator += (x[i] - x_mean) * (y[i] - y_mean)
      denominator += (x[i] - x_mean) ** 2
    self.slope = numerator / denominator
    self.intercept = y_mean - self.slope * x_mean
    # print("Slope : ", self.slope)
    # print("Intercept : ", self.intercept)

  # Prediction
  def predict(self, x):
    y_pred = []
    for i in x:
      y_pred.append(self.slope * i + self.intercept)
    return y_pred

In [None]:
# # Creating own dataset
# x = np.array([1,2 , 3, 4, 5])
# y = np.array([10, 11, 12, 13, 14, 15])

**Train**

In [None]:
lr = LinearRegressionScratch()
lr.fit(x, y)

In [None]:
# Slope and Intercept
print(lr.slope)
print(lr.intercept)

[0.56959129]
[-0.98567795]


**Prediction**

In [None]:
# Prediction for whole test data
y_pred = lr.predict(x_test)
print(y_pred)

[array([3.90141536]), array([3.08689981]), array([2.36351887]), array([2.55717991]), array([1.61735427]), array([1.74266436]), array([2.04454774]), array([2.92171834]), array([3.77040937]), array([2.92741425]), array([4.10646823]), array([3.51409328]), array([2.96158973]), array([2.38060661]), array([3.18373033]), array([3.46852598]), array([1.91354174]), array([3.24068946]), array([2.96728564]), array([3.41726276]), array([2.54009217]), array([3.16094668]), array([2.84767147]), array([3.11537938]), array([3.69066658]), array([2.85906329]), array([3.49700554]), array([3.34321589]), array([3.92989493]), array([1.93062948]), array([3.65649111]), array([3.20651398]), array([3.74762571]), array([2.7736246]), array([2.76792869]), array([3.26916903]), array([3.53118102]), array([2.5970513]), array([2.6426186]), array([2.69957773])]


In [None]:
# Actual data (CGPA) and prediction (Package)
x_test[0], y_test[0]

(array([8.58]), array([4.1]))

In [None]:
# Model Prediction (Package) for single data point
print(lr.predict(x_test[0]))

[array([3.90141536])]
