In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

df = pd.read_csv("StudentPerformanceFactors.csv") 

df.head()

In [None]:
correlations = df.corr(numeric_only=True)['Exam_Score'].sort_values(ascending=False)
print("Correlation with Exam Score:\n", correlations)

plt.figure(figsize=(8, 5))
plt.scatter(df['Hours_Studied'], df['Exam_Score'], alpha=0.5, color='blue')
plt.title('Hours Studied vs Exam Score')
plt.xlabel('Hours Studied')
plt.ylabel('Exam Score')
plt.grid(True)
plt.show()

In [None]:
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression

X = df.select_dtypes(include=['number']).drop(columns=['Exam_Score']) 
y = df['Exam_Score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

model = LinearRegression()
model.fit(X_train, y_train)

print("Model trained succesfully!")

In [None]:
score = model.score(X_test, y_test)
print(f"Model Accuracy (R^2 Score): {score:.2f}")

In [None]:
student_df = pd.DataFrame([[5, 85, 7, 75, 2, 3]], columns=X.columns)

predicted_score = model.predict(student_df)

print(f"Predicted Exam Score: {predicted_score[0]:.2f}/100")

In [None]:
original_student = pd.DataFrame([[5, 80, 6, 70, 0, 2]], columns=X.columns) 
score_a = model.predict(student_df)[0]

improved_student = pd.DataFrame([[10, 80, 6, 70, 3, 2]], columns=X.columns)
score_b = model.predict(improved_student)[0] 

print(f"Original Score: {score_a:.2f}")
print(f"Improved Score: {score_b:.2f}")
print(f"Motivation: By increasing study hours and attending tutoring, the student gains {score_b - score_a:.2f} points.")