In [2]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

#reading datasets
train_data = pd.read_csv("train_dataset_linear_reg.csv")
test_data = pd.read_csv("test_dataset_linear_reg.csv")

#shape of dataset
print("Shape of training dataset: ", train_data.shape)
print("Shape of testing dataset: ", test_data.shape)

#segregating datasets into input(X) and output(Y) datasets
#our target variable here is Item_Outlet_Sales(dependant variable)

#training datasets
x_train = train_data.drop(columns = ['Item_Outlet_Sales'], axis = 1)
y_train = train_data['Item_Outlet_Sales']

#testing datasets
x_test = test_data.drop(columns=['Item_Outlet_Sales'], axis = 1)
y_test = test_data['Item_Outlet_Sales']

#creating linear regression model
model = LinearRegression()

#training it
model.fit(x_train,y_train)

#coefficients of trained model
print("\nCoefficients of trained model: ", model.coef_)

#intercept of trained model
print("\nIntercept of trained model: ", model.intercept_)

#predict target on training dataest
train_predictions = model.predict(x_train)
print("\nItem_Outlet_Sales prediction on training dataset: ", train_predictions)

#root mean sqaured error on training dataset
rmse_train = mean_squared_error(y_train, train_predictions)**(0.5)
print("\nRMSE on train dataset: ", rmse_train)

#predict target on testing dataest
test_predictions = model.predict(x_test)
print("\nItem_Outlet_Sales prediction on testing dataset: ", test_predictions)

#root mean squared error on testing dataset
rmse_test = mean_squared_error(y_test, test_predictions)**(0.5)
print("\nRMSE on test dataset: ", rmse_test)


Shape of training dataset:  (6818, 36)
Shape of testing dataset:  (1705, 36)

Coefficients of trained model:  [ 6.22922159e-01 -3.05543741e+02  1.54753664e+01  3.28472830e+01
 -9.74873370e+00 -2.41795130e+01  4.56644413e+00  1.12508091e+02
 -8.31462886e+01 -4.07559798e+00  3.42962529e+01 -3.52111189e+01
  3.01171812e+01 -7.78837887e+01 -5.81991352e+01  3.35999951e+01
 -3.45385698e+01 -1.59912918e+01 -8.37658470e+01  9.42301806e+00
 -6.41750596e+01  3.09361889e+02 -4.29785631e+01 -7.42604600e+01
  7.42810959e+01  7.90650333e+02  1.49967901e+02  1.68544650e+02
  1.23282081e+02  3.94826434e+01 -1.62764724e+02 -1.58933750e+03
 -6.28256479e+01 -4.02668500e+02  2.05483165e+03]

Intercept of trained model:  -65678.96259572908

Item_Outlet_Sales prediction on training dataset:  [1027.24684892  942.24471157 1873.35514454 ... 2885.32100831 -413.6349489
 1745.5523439 ]

RMSE on train dataset:  1128.3872104647512

Item_Outlet_Sales prediction on testing dataset:  [4120.04827319 1709.13115544 2950.