**Importing dependecies**

In [24]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

**----- DATA ANALYSIS & PRE PROCESSING-----**

**Loading the dataset and initial data analysis**

In [None]:
dataset = pd.read_csv('dataset.csv')  #reading the dataset

In [None]:
dataset.head()  #gives 1st 5 rows

Unnamed: 0,hours,age,internet,marks
0,6.83,15,1,78.5
1,6.56,16,0,76.74
2,,17,1,78.68
3,5.67,18,0,71.82
4,8.67,19,1,84.19


In [None]:
dataset.tail()  #gives last 5 rows

Unnamed: 0,hours,age,internet,marks
196,8.56,19,1,84.68
197,8.94,20,1,86.75
198,6.6,15,1,78.05
199,8.35,16,1,83.5
200,4.15,15,0,81.45


In [None]:
dataset.info()  #information about the dataset

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201 entries, 0 to 200
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   hours     196 non-null    float64
 1   age       201 non-null    int64  
 2   internet  201 non-null    int64  
 3   marks     201 non-null    float64
dtypes: float64(2), int64(2)
memory usage: 6.4 KB


In [None]:
dataset.describe() #statistical info about the dataset

Unnamed: 0,hours,age,internet,marks
count,196.0,201.0,201.0,201.0
mean,6.981429,17.467662,0.552239,77.951244
std,1.266266,1.720523,0.498505,4.919626
min,4.15,15.0,0.0,68.57
25%,5.7575,16.0,0.0,73.4
50%,7.11,17.0,1.0,77.77
75%,8.0825,19.0,1.0,82.3
max,8.99,20.0,1.0,86.99


In [None]:
dataset.isnull().sum() #Checking for null values

hours       5
age         0
internet    0
marks       0
dtype: int64

In [9]:
dataset.shape

(201, 4)

**Filling the missing values with Mean**

In [10]:
dataset['hours'] = dataset['hours'].fillna(dataset.hours.mean())

**Analysis of above opearation**

In [None]:
dataset.isnull().sum() #null values cleared

hours       0
age         0
internet    0
marks       0
dtype: int64

**Dividing the data into X and Y**

In [None]:
X = dataset.iloc[:, :-1] #taking independent columns to X
Y = dataset['marks'] #taking dependent column to Y

In [13]:
print(X)

        hours  age  internet
0    6.830000   15         1
1    6.560000   16         0
2    6.981429   17         1
3    5.670000   18         0
4    8.670000   19         1
..        ...  ...       ...
196  8.560000   19         1
197  8.940000   20         1
198  6.600000   15         1
199  8.350000   16         1
200  4.150000   15         0

[201 rows x 3 columns]


In [14]:
print(Y)

0      78.50
1      76.74
2      78.68
3      71.82
4      84.19
       ...  
196    84.68
197    86.75
198    78.05
199    83.50
200    81.45
Name: marks, Length: 201, dtype: float64


**Initializing a model and training it**

In [15]:
model = LinearRegression()
model.fit(X,Y)

**Evaluating the model**

In [19]:
a = [[5.05, 18, 1]]
result = model.predict(a)
print(result)

[70.65194936]




In [36]:
a2 = [[6.36, 18, 0]]
result_a2 = model.predict(a2)
print(result_a2)

[75.62481916]




**---- ANOTHER MODEL WITH SPLITTING AND STANDARDIZING THE DATA ----**

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=34) #Splitting the data(80% for train, 20% for test)

In [None]:
print(X_train.shape)  #dimensions of the splitted data
print(X_test.shape)

(160, 3)
(41, 3)


In [None]:
print(Y_train.shape)  #dimensions of the splitted data
print(Y_test.shape)

(160,)
(41,)


**Scaling the data**

In [30]:
SC = StandardScaler()
X_train_transformed = SC.fit_transform(X_train)
X_test_transformed = SC.transform(X_test)

In [31]:
print(X_train_transformed)

[[ 0.53801616  0.92525692  0.85972695]
 [-1.33735322 -0.80284939  0.85972695]
 [-0.04535477 -0.22681395  0.85972695]
 [ 1.3417459   0.92525692  0.85972695]
 [-1.23181295  1.50129236  0.85972695]
 [ 0.41623893 -0.80284939  0.85972695]
 [ 0.47306831  0.34922148 -1.16316   ]
 [-0.93954759 -0.22681395 -1.16316   ]
 [ 1.58530037 -0.80284939 -1.16316   ]
 [ 0.73285974  0.34922148 -1.16316   ]
 [-1.49160438 -0.80284939  0.85972695]
 [-1.28864233  1.50129236  0.85972695]
 [-1.49972287 -0.22681395  0.85972695]
 [-0.42808322 -0.22681395  0.85972695]
 [ 1.06571751 -0.80284939 -1.16316   ]
 [-2.34404501 -1.37888483 -1.16316   ]
 [ 0.1564475  -0.22681395  0.85972695]
 [ 1.26867956 -0.22681395 -1.16316   ]
 [-1.47536742  1.50129236  0.85972695]
 [ 1.49599706  0.92525692 -1.16316   ]
 [ 1.23620563  0.92525692  0.85972695]
 [ 0.55425313  0.34922148  0.85972695]
 [-1.32923474 -0.22681395  0.85972695]
 [ 1.53658947 -0.22681395  0.85972695]
 [ 0.0671442  -1.37888483 -1.16316   ]
 [-1.01261393  0.92525692

In [32]:
print(X_test_transformed)

[[-0.02215911  0.92525692  0.85972695]
 [-0.93954759 -0.80284939 -1.16316   ]
 [ 0.87087393 -1.37888483  0.85972695]
 [-0.90707367  0.34922148 -1.16316   ]
 [-0.42808322  1.50129236 -1.16316   ]
 [ 0.40000197 -0.22681395  0.85972695]
 [-1.2561684  -1.37888483  0.85972695]
 [-1.59714465  1.50129236 -1.16316   ]
 [-0.42808322 -0.22681395 -1.16316   ]
 [ 1.35798286  0.92525692 -1.16316   ]
 [-0.36313536  0.92525692 -1.16316   ]
 [-1.49972287 -0.22681395 -1.16316   ]
 [ 0.82216304  1.50129236 -1.16316   ]
 [-0.80965188  0.34922148  0.85972695]
 [ 0.82216304  0.34922148  0.85972695]
 [-1.64585554  1.50129236 -1.16316   ]
 [ 1.3417459  -0.80284939  0.85972695]
 [ 0.77345215 -0.22681395  0.85972695]
 [-1.29676081 -0.22681395  0.85972695]
 [-1.3454717   0.34922148 -1.16316   ]
 [-1.56467072  0.92525692  0.85972695]
 [ 1.5690634   1.50129236 -1.16316   ]
 [-1.11003572  0.34922148 -1.16316   ]
 [-0.30630598  0.92525692  0.85972695]
 [ 0.66791188  0.34922148 -1.16316   ]
 [ 0.18080295  0.34922148

**Initializing a model and training it**

In [33]:
model1 = LinearRegression()
model1.fit(X_train_transformed, Y_train)

In [34]:
y_pred = model1.predict(X_test)
print(y_pred)

[109.97633198 104.94809993 115.13631875 105.08876683 107.76564247
 112.41798671 103.05474257 101.12538686 107.83131943 117.93236975
 108.15643788 101.74441846 114.86702694 105.52527434 114.79396447
 100.84870954 117.78897383 114.53917947 102.78039349 102.59867097
 101.21488361 119.10941247 103.93594467 108.36238097 114.03466675
 111.15104646 115.57828431 101.99880267 104.39474529 115.22854452
 105.34315104 104.41623683 102.98713814 118.09492897 115.83306931
 109.23386932 114.6099137  101.19299129 114.95419545 101.2832896
 114.21911829]




In [35]:
b = [[5.05, 18, 1]]
result1 = model.predict(b)
print(result1)

[70.65194936]




In [37]:
b2 = [[6.36, 18, 0]]
result_b2 = model.predict(b2)
print(result_b2)

[75.62481916]


