In [None]:
import pandas as pd                                             # import for dataframes
import numpy as np                                              # import for arrays
import matplotlib.pyplot as plt                                 # import for plotting
from sklearn import linear_model                                # importing the Linear Regression Algorithm from sklearn
from sklearn.model_selection import train_test_split            # splitting data into training set and testing set

# Link to dataset: https://archive.ics.uci.edu/ml/datasets/student+performance

#######################################################################################
# Step 1: Load the data and store the labels in variable x and target in variable y   #
#######################################################################################

student_data = pd.read_csv("student-mat.csv", sep=";")                               # Read in the data from the csv file
#print(student_data.head())

student_data = student_data[["G1", "G2", "G3", "studytime", "failures", "absences"]] # extract features to be used for this model
#print(student_data.head())

X = np.array(student_data.drop(["G3"], 1))
Y = np.array(student_data["G3"])

#######################################################################################
# Step 2: Split the dataset into a training set and test set.                         #
#         70% of the data should be for the training set.                             #
#         30% of the data should be for the test set.                                 #
#######################################################################################

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3)

##################################
# Step 3: Create the model.      #
##################################

linear_regression_model = linear_model.LinearRegression()

##################################
# Step 4: Train the model.       #
##################################

linear_regression_model.fit(x_train, y_train)

##################################
# Step 5: Test the model.        #
##################################

y_predict = linear_regression_model.predict(x_test)

##################################
# Step 6: Evaluate the model.    #
##################################

accuracy = linear_regression_model.score(x_test, y_test)
accuracy = accuracy * 100
accuracy = accuracy.round(2)

print(f"\nAccuracy of the model is: {accuracy}%\n")

##############################################################################
# Step 7: Print out the result, input parameters, and expected result        #
##############################################################################

print("Predicted Value, [Input Data], Actual Value")
for i in range(len(y_predict)):
  print(int(y_predict[i]), x_test[i], y_test[i])


Accuracy of the model is: 77.39%

Predicted Value, [Input Data], Actual Value
12 [16 12  1  0  8] 13
11 [10 12  3  0  4] 12
8 [8 9 2 0 8] 10
13 [13 13  2  0  2] 13
15 [14 15  2  0  0] 16
12 [15 12  3  0  0] 14
10 [13 11  2  1  3] 11
6 [7 7 1 0 3] 8
15 [15 15  2  0  2] 16
19 [18 19  1  0 10] 19
11 [11 12  1  0  0] 10
10 [ 9 11  3  0  2] 11
8 [10  9  2  0  4] 11
15 [15 15  2  0  2] 16
5 [7 6 1 0 5] 7
9 [ 9 10  3  0  0] 0
15 [15 15  2  0  0] 15
12 [13 12  1  0 20] 12
6 [ 7  8  2  3 10] 10
7 [11  8  2  0  2] 8
11 [12 11  2  0 11] 11
4 [6 5 1 0 7] 6
11 [13 11  2  0  2] 11
9 [ 8 10  1  0  0] 11
15 [16 15  3  0  9] 16
9 [10 10  2  0  4] 10
11 [12 12  2  1 12] 13
8 [ 7 10  3  1  0] 10
11 [10 12  2  1  4] 12
8 [10  9  4  0  0] 0
3 [7 5 3 1 0] 0
9 [12 10  2  0  2] 11
15 [16 15  2  0 10] 15
15 [13 15  2  0  2] 16
8 [10  9  2  0  0] 0
6 [7 7 3 0 0] 8
11 [14 12  2  1  0] 12
9 [ 6 10  2  0  4] 10
12 [13 12  3  0  6] 12
8 [9 9 2 0 6] 10
7 [9 9 1 2 8] 9
11 [11 12  2  0 10] 13
10 [10 11  2  0  2] 12
1

