In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#Simple Linear Regression

In [None]:
class MyLR:
  def __init__ (self):
    self.m=None
    self.c=None

  def fit(self,X_train,y_train):
    num=0
    den=0

    for i in range(X_train.shape[0]):
      num = num + ((X_train[i] - X_train.mean())*(y_train[i] - y_train.mean())) #.mean(), .shape is an attribute specific to NumPy arrays.
      den = den + ((X_train[i] - X_train.mean())*(X_train[i] - X_train.mean()))

    self.m = num/den
    self.c = y_train.mean() - (self.m*X_train.mean())

    print(self.m)
    print(self.c)

  def predict(self,X_test):
    return self.m * X_test + self.c

In [None]:
df=pd.read_csv('placement (1).csv')
df.shape

(200, 2)

In [None]:
df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


#Model Training

In [None]:
X=df.iloc[:,0].values #here .values is used to convert pandas series into numpy array because scikit-learn expect inputs as NumPy arrays or 2D arrays.
y=df.iloc[:,-1].values #It avoids surprises if we are mixing Series with arrays and doing math operations.


In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [None]:
X_train.shape

(160,)

In [None]:
X_test.shape

(40,)

In [None]:
lr=MyLR()

In [None]:
lr.fit(X_train,y_train)

0.5579519734250721
-0.8961119222429152


In [None]:
y_test

array([4.1 , 3.49, 2.08, 2.33, 1.94, 1.48, 1.86, 3.09, 4.21, 2.87, 3.65,
       4.  , 2.89, 2.6 , 2.99, 3.25, 1.86, 3.67, 2.37, 3.42, 2.48, 3.65,
       2.6 , 2.83, 4.08, 2.56, 3.58, 3.81, 4.09, 2.01, 3.63, 2.92, 3.51,
       1.94, 2.21, 3.34, 3.34, 3.23, 2.01, 2.61])

In [None]:
y_pred =lr.predict(X_test)
y_pred

array([3.89111601, 3.09324469, 2.38464568, 2.57434935, 1.6537286 ,
       1.77647803, 2.07219258, 2.93143862, 3.76278706, 2.93701814,
       4.09197872, 3.51170867, 2.97049525, 2.40138424, 3.18809652,
       3.46707251, 1.94386362, 3.24389172, 2.97607477, 3.41685683,
       2.55761079, 3.16577844, 2.85890486, 3.12114229, 3.68467378,
       2.8700639 , 3.49497011, 3.34432308, 3.91901361, 1.96060218,
       3.65119666, 3.2104146 , 3.74046898, 2.7863711 , 2.78079158,
       3.27178932, 3.52844723, 2.61340599, 2.65804215, 2.71383735])

#Regression Metrics from Scratch

In [None]:
class RegMetrics:
  def __init__ (self):
    self.r2=None
    self.mae=None
    self.rmse=None
    self.mse=None

  def r2_score(self,y_test,y_pred):
    num=0
    den=0

    for i in range(len(y_pred)):
      num=num+((y_test[i]-y_pred[i])**2)
      den=den+((y_test[i]-y_test.mean())**2)

    self.r2=1-(num/den)

    print("R2_Score:",self.r2)

  def compute_mae(self,y_test,y_pred):
    num=0

    for i in range(len(y_pred)):
      num=num+abs(y_test[i]-y_pred[i])


    self.mae=num/len(y_pred)

    print("MAE:",self.mae)

  def compute_mse(self,y_test,y_pred):
    num=0

    for i in range(len(y_pred)):
      num+=((y_test[i]-y_pred[i])**2)

    self.mse=num/len(y_pred)

    print("MSE:",self.mse)

  def compute_rmse(self,y_test,y_pred):
    num=0

    for i in range(len(y_pred)):
      num=num+((y_test[i]-y_pred[i])**2)

    self.rmse=np.sqrt(num/len(y_pred))

    print("RMSE:",self.rmse)

In [None]:
metrics=RegMetrics()

In [None]:
metrics.r2_score(y_test,y_pred)
metrics.compute_mae(y_test,y_pred)
metrics.compute_mse(y_test,y_pred)
metrics.compute_rmse(y_test,y_pred)

R2_Score: 0.7807301475103842
MAE: 0.28847109318781733
MSE: 0.1212923531349552
RMSE: 0.34827051717731605


#Multiple Linear Regression

In [None]:
class MeraLR:
  def __init__(self):
    self.coef_=None
    self.intercept_=None

  def fit(self,X_train,y_train):
      # Reshape X_train to be 2-dimensional if it's 1D
      if X_train.ndim == 1:
          X_train = X_train.reshape(-1, 1)
      X_train = np.insert(X_train,0,1,axis=1)

      # calcuate the coeffs
      betas = np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
      self.intercept_ = betas[0]
      self.coef_ = betas[1:]

  def predict(self,X_test):
      # Reshape X_test to be 2-dimensional if it's 1D
      if X_test.ndim == 1:
          X_test = X_test.reshape(-1, 1)
      y_pred = np.dot(X_test,self.coef_) + self.intercept_
      return y_pred

In [None]:
df=pd.read_csv('placement (1).csv')

In [None]:
X=df.iloc[:,0].values
y=df.iloc[:,-1].values

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [None]:
LinearReg=MeraLR()

In [None]:
LinearReg.fit(X_train,y_train)

In [None]:
y_test

array([4.1 , 3.49, 2.08, 2.33, 1.94, 1.48, 1.86, 3.09, 4.21, 2.87, 3.65,
       4.  , 2.89, 2.6 , 2.99, 3.25, 1.86, 3.67, 2.37, 3.42, 2.48, 3.65,
       2.6 , 2.83, 4.08, 2.56, 3.58, 3.81, 4.09, 2.01, 3.63, 2.92, 3.51,
       1.94, 2.21, 3.34, 3.34, 3.23, 2.01, 2.61])

In [None]:
y_pred=LinearReg.predict(X_test)
y_pred

array([3.89111601, 3.09324469, 2.38464568, 2.57434935, 1.6537286 ,
       1.77647803, 2.07219258, 2.93143862, 3.76278706, 2.93701814,
       4.09197872, 3.51170867, 2.97049525, 2.40138424, 3.18809652,
       3.46707251, 1.94386362, 3.24389172, 2.97607477, 3.41685683,
       2.55761079, 3.16577844, 2.85890486, 3.12114229, 3.68467378,
       2.8700639 , 3.49497011, 3.34432308, 3.91901361, 1.96060218,
       3.65119666, 3.2104146 , 3.74046898, 2.7863711 , 2.78079158,
       3.27178932, 3.52844723, 2.61340599, 2.65804215, 2.71383735])

#Regression Metrics

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score

In [None]:
print('MAE',mean_absolute_error(y_test,y_pred))
print('MSE',mean_squared_error(y_test,y_pred))
print('RMSE',np.sqrt(mean_squared_error(y_test,y_pred)))
print('R2_score',r2_score(y_test,y_pred))

MAE 0.2884710931878158
MSE 0.12129235313495437
RMSE 0.3482705171773149
R2_score 0.7807301475103857


#### Summary of Models Performance:

*   Model performed quite well with low prediction errors and a strong R² score.

*   A 78% R² indicates the model captures most—but not all—of the variability in the target.

*   It may benefit from feature engineering, adding more predictive variables, or trying a more complex model (like Random Forest or Gradient Boosting) for improved performance.