# Loading Libraries

In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import matplotlib.pyplot as plt

In [4]:
import seaborn as sns

# Loading Dataset

In [5]:
dataset = pd.read_csv("D:/Pantech Solutions/12_ExamMarkPrediction_LINEARREGRESSION_MULTIPLEVARIABLES/data.csv")

In [6]:
dataset.head()

Unnamed: 0,hours,age,internet,marks
0,6.83,15,1,78.5
1,6.56,16,0,76.74
2,,17,1,78.68
3,5.67,18,0,71.82
4,8.67,19,1,84.19


In [7]:
dataset.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
hours,196.0,6.981429,1.266266,4.15,5.7575,7.11,8.0825,8.99
age,201.0,17.467662,1.720523,15.0,16.0,17.0,19.0,20.0
internet,201.0,0.552239,0.498505,0.0,0.0,1.0,1.0,1.0
marks,201.0,77.951244,4.919626,68.57,73.4,77.77,82.3,86.99


In [8]:
dataset.columns

Index(['hours', 'age', 'internet', 'marks'], dtype='object')

In [9]:
dataset.size

804

In [10]:
dataset.ndim

2

# Finding and Removing Null Values from our Features

In [11]:
dataset.isnull().any()

hours        True
age         False
internet    False
marks       False
dtype: bool

In [12]:
dataset.columns[dataset.isnull().any()]

Index(['hours'], dtype='object')

In [13]:
dataset.isna().sum()

hours       5
age         0
internet    0
marks       0
dtype: int64

In [14]:
dataset.head()

Unnamed: 0,hours,age,internet,marks
0,6.83,15,1,78.5
1,6.56,16,0,76.74
2,,17,1,78.68
3,5.67,18,0,71.82
4,8.67,19,1,84.19


In [15]:
dataset.hours = dataset.hours.fillna(dataset.hours.mean())

In [16]:
dataset.head()

Unnamed: 0,hours,age,internet,marks
0,6.83,15,1,78.5
1,6.56,16,0,76.74
2,6.981429,17,1,78.68
3,5.67,18,0,71.82
4,8.67,19,1,84.19


# Segregate Database into 2 Parts - Independent and Dependent Variable

In [17]:
x = dataset.iloc[:,0:3].values

In [18]:
x

array([[ 6.83      , 15.        ,  1.        ],
       [ 6.56      , 16.        ,  0.        ],
       [ 6.98142857, 17.        ,  1.        ],
       [ 5.67      , 18.        ,  0.        ],
       [ 8.67      , 19.        ,  1.        ],
       [ 7.55      , 20.        ,  0.        ],
       [ 6.67      , 15.        ,  0.        ],
       [ 8.99      , 16.        ,  0.        ],
       [ 5.19      , 17.        ,  1.        ],
       [ 6.75      , 18.        ,  0.        ],
       [ 6.59      , 19.        ,  0.        ],
       [ 8.56      , 20.        ,  1.        ],
       [ 7.75      , 15.        ,  0.        ],
       [ 7.9       , 16.        ,  1.        ],
       [ 8.19      , 17.        ,  0.        ],
       [ 6.55      , 18.        ,  1.        ],
       [ 6.36      , 19.        ,  0.        ],
       [ 8.44      , 20.        ,  1.        ],
       [ 8.41      , 15.        ,  0.        ],
       [ 7.67      , 16.        ,  1.        ],
       [ 7.42      , 17.        ,  1.   

In [19]:
y = dataset.iloc[:,3].values

In [20]:
y

array([78.5 , 76.74, 78.68, 71.82, 84.19, 81.18, 76.99, 85.46, 70.66,
       77.82, 75.37, 83.88, 79.5 , 80.76, 83.08, 76.03, 76.04, 85.11,
       82.5 , 80.58, 82.18, 83.36, 70.67, 75.02, 70.96, 83.33, 74.75,
       75.65, 74.15, 80.17, 82.27, 76.14, 71.1 , 84.35, 83.08, 76.76,
       81.24, 78.21, 73.08, 83.23, 70.27, 86.41, 71.1 , 82.84, 82.38,
       72.96, 77.46, 70.11, 72.38, 71.41, 72.22, 77.77, 84.44, 71.45,
       82.21, 85.48, 75.03, 86.65, 70.9 , 71.7 , 73.61, 79.41, 76.19,
       80.43, 85.78, 70.06, 81.25, 81.7 , 69.27, 82.79, 71.8 , 71.79,
       74.97, 78.61, 77.59, 72.33, 72.08, 77.33, 70.05, 73.34, 84.  ,
       82.93, 76.63, 75.36, 77.29, 72.87, 73.4 , 81.74, 71.85, 84.6 ,
       79.56, 82.1 , 72.08, 79.1 , 81.01, 76.48, 75.39, 68.57, 83.64,
       82.3 , 75.18, 82.03, 82.99, 79.26, 77.55, 77.07, 72.1 , 73.25,
       74.25, 70.58, 81.08, 75.04, 76.38, 80.86, 78.42, 74.44, 70.34,
       85.04, 73.61, 75.55, 76.2 , 82.69, 76.83, 79.53, 83.57, 85.95,
       76.02, 77.65,

# Splitting Dataset into Training and Testing

In [21]:
from sklearn.model_selection import train_test_split

In [22]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0, shuffle=True)

In [23]:
print(f"Count of x_train : {len(x_train)}\t\tCount of y_train : {len(y_train)}")
print(f"Count of x_test  : {len(x_test)}\t\tCount of y_train : {len(y_test)}")

Count of x_train : 160		Count of y_train : 160
Count of x_test  : 41		Count of y_train : 41


In [24]:
dataset.shape

(201, 4)

# Training 

In [25]:
from sklearn.linear_model import LinearRegression

In [26]:
model = LinearRegression()

In [27]:
model.fit(x_train, y_train)

LinearRegression()

# Predicting Marks for any 1 Student

In [28]:
hours = float(input("Enter Hours    : "))
age = int(input("Enter Age      : "))
internet = int(input("Enter Internet : "))

newStudentmarks = [[hours, age, internet]]
result = model.predict(newStudentmarks)
print(result)

Enter Hours    : 7.1
Enter Age      : 20
Enter Internet : 0
[78.41439698]


# Predicting for all Test Data

In [29]:
y_prediction = model.predict(x_test)

In [30]:
np.concatenate((y_test.reshape(len(y_test),1), y_prediction.reshape(len(y_prediction),1)),1)

array([[82.5       , 83.53281468],
       [71.18      , 71.30591001],
       [73.25      , 73.03365287],
       [83.64      , 85.23858572],
       [73.64      , 73.53695993],
       [86.99      , 84.40791128],
       [81.18      , 80.11350377],
       [82.75      , 81.70798011],
       [79.5       , 81.04079138],
       [81.7       , 82.04780147],
       [79.41      , 78.98607712],
       [85.95      , 84.64445522],
       [77.19      , 77.92552299],
       [78.45      , 77.63678805],
       [84.        , 83.02950762],
       [85.46      , 85.68834874],
       [84.35      , 84.82658062],
       [73.19      , 72.93616519],
       [78.21      , 78.89192157],
       [77.59      , 79.3105909 ],
       [83.87      , 84.01454589],
       [85.15      , 85.53731702],
       [72.96      , 74.8393692 ],
       [80.72      , 81.35285118],
       [73.61      , 72.33734954],
       [79.53      , 80.44666089],
       [78.17      , 79.01852384],
       [79.63      , 82.00140021],
       [76.83      ,

# Evaluting Model - Mean Squared Error

In [31]:
from sklearn.metrics import mean_squared_error

In [32]:
print(f"Mean Squared Error of the Model is : {mean_squared_error(y_test, y_prediction)}")

Mean Squared Error of the Model is : 1.153124175157672
