# AMS 325 Project: Part 1: Multiple Linear Regression

##  Members: Hangting Lu, Chisom Uwakwe, Kevin Coughlin

### This project is aimed at assessing various students based on: no. of hours studied, previous test scores, no. of extracurricular activities, no. of hours slept, and, no. of sample tests practiced.

### Using these criteria, each student is given a performance index score from 10-100 which represents the overall academic performance of the student.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
import statsmodels.graphics.api as smg

In [None]:
# Reads the downloaded dataframe from the dataset and makes adjustments to be callable in Python
df_stud = pd.read_csv("C:\\Users\\chiso\\OneDrive\\Documents\\540256\\Student_Performance.csv")
df_stud.rename(columns={
    'Hours Studied': 'Hours_Studied',
    'Previous Scores': 'Previous_Scores',
    'Extracurricular Activities': 'Extracurriculars',
    'Sleep Hours': 'Sleep_Hours',
    'Sample Question Papers Practiced': 'Samples_Practiced',
    'Performance Index': 'Performance_Index'    
}, inplace=True)

print(df_stud.info()); print(df_stud.min()); print(df_stud.max())


In [None]:
df_sub = df_stud.drop(columns=["Extracurriculars"])
df_sub.head(5)

In [None]:
# Defines the independent variables (X) and dependent variable (y)
X = df_sub.drop(columns=["Performance_Index"])
X = sm.add_constant(X)  # Adds a constant term to the model
y = df_sub["Performance_Index"]

# Performs OLS regression
model = sm.OLS(y, X).fit()

# Displays the summary
print(model.summary())

In [None]:
# Uses qqplot to check normality (should be linear)
fig, ax = plt.subplots(figsize=(8, 4))
smg.qqplot(model.resid, ax=ax)
fig.tight_layout()

In [None]:
# Extracts the coefficients
coefficients = model.params

# Displays the model equation
model_equation = "Performance Index = " + " + ".join([f"{coeff:.2f}*{col}" if col != "const" else f"{coeff:.2f}" 
                                                      for col, coeff in coefficients.items()])
print("Model Equation:")
print(model_equation)
print(f"R squared value is {model.rsquared}.")

In [None]:
# Splits the data into training and testing sets and removes the constant row.
X=X.drop('const', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#Creates a linear regression model and train it
lm = LinearRegression(fit_intercept=True)
lm.fit(X_train, y_train)

In [None]:
#Makes predictions based on the trained model
y_pred = lm.predict(X_test)

In [None]:
#Visualizes the predictions
plt.scatter(y_test, y_pred)
plt.xlabel("True values")
plt.ylabel("Predicted values")
plt.title("True vs Predicted performance index scores")
plt.show()

In [None]:
print("Intercept:", lm.intercept_)
print("Coefficients:", lm.coef_)

# What's Your PI

In [None]:
coefficients

In [None]:
import tkinter as tk
from tkinter import messagebox

In [None]:
# Model coefficients
intercept = coefficients['const']
coef_weekly_study = coefficients['Hours_Studied']
coef_previous_scores = coefficients['Previous_Scores']
coef_sleep_hours = coefficients['Sleep_Hours']
coef_samples_practiced = coefficients['Samples_Practiced']

In [None]:
# Function to calculate Performance Index
def calculate_performance_index():
    try:
        weekly_study = float(entry_hours_studied.get())
        hours_studied = float(weekly_study/7)
        previous_scores = float(entry_previous_scores.get())
        sleep_hours = float(entry_sleep_hours.get())
        samples_practiced = float(entry_samples_practiced.get())
        
        # Validate that the sum of hours studied and sleep hours doesn't exceed 24
        if hours_studied + sleep_hours > 24:
            messagebox.showerror("Invalid Input", "The sum of Hours Studied and Sleep Hours cannot exceed 24 hours.")
            return
        
        # Calculating Performance Index based on the model equation
        performance_index = intercept + (coef_weekly_study * weekly_study) + \
                            (coef_previous_scores * previous_scores) + \
                            (coef_sleep_hours * sleep_hours) + \
                            (coef_samples_practiced * samples_practiced)
        
        # Displaying the result in the label
        label_result.config(text=f'Predicted Performance Index: {performance_index:.2f}')
        # Adding the performance index limit
        if performance_index >= 100:
            performance_index = 100
            messagebox.showinfo("Max Rating", "Max rating achieved!")

    
    except ValueError:
        messagebox.showerror("Invalid Input", "Please enter valid numeric values.")

In [None]:
# Setting up the main window
root = tk.Tk()
root.title("Student Performance Index Prediction")

# Creating input labels and fields
label_hours_studied = tk.Label(root, text="Enter Hours Studied per Week:")
label_hours_studied.grid(row=0, column=0, padx=10, pady=5)
entry_hours_studied = tk.Entry(root)
entry_hours_studied.grid(row=0, column=1, padx=10, pady=5)

label_previous_scores = tk.Label(root, text="Enter Previous Test Scores:")
label_previous_scores.grid(row=1, column=0, padx=10, pady=5)
entry_previous_scores = tk.Entry(root)
entry_previous_scores.grid(row=1, column=1, padx=10, pady=5)

label_sleep_hours = tk.Label(root, text="Enter Hours of Sleep per Night:")
label_sleep_hours.grid(row=2, column=0, padx=10, pady=5)
entry_sleep_hours = tk.Entry(root)
entry_sleep_hours.grid(row=2, column=1, padx=10, pady=5)

label_samples_practiced = tk.Label(root, text="Enter Number of Sample Tests Practiced:")
label_samples_practiced.grid(row=3, column=0, padx=10, pady=5)
entry_samples_practiced = tk.Entry(root)
entry_samples_practiced.grid(row=3, column=1, padx=10, pady=5)

In [None]:
# Button to calculate the Performance Index
button_calculate = tk.Button(root, text="Calculate Performance Index", command=calculate_performance_index)
button_calculate.grid(row=4, column=0, columnspan=2, pady=10)

# Label to display the result
label_result = tk.Label(root, text="Predicted Performance Index: ")
label_result.grid(row=5, column=0, columnspan=2, pady=10)

# Start the Tkinter event loop
root.mainloop()