In [9]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# Step 1: CSV ফাইল লোড করা
df = pd.read_csv('student_data.csv')

# Step 2: Feature (X) ও Target (y) আলাদা করা
X = df[['StudyHours', 'SleepHours', 'Attendance']]
y = df['Marks']

# Step 3: Train/Test Split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Step 4: মডেল তৈরি ও ট্রেইন করা
model = LinearRegression()
model.fit(X_train, y_train)

# Step 5: টেস্ট সেটের উপর প্রেডিকশন
y_pred = model.predict(X_test)

# Step 6: মডেল প্যারামিটার ও পারফরম্যান্স দেখা
print("✅ Model Parameters:")
print("Intercept:", model.intercept_)
print("Coefficients:")
print("  StudyHours :", model.coef_[0])
print("  SleepHours :", model.coef_[1])
print("  Attendance :", model.coef_[2])

print("\n📊 Test Set Evaluation:")
print("R-squared (R²):", r2_score(y_test, y_pred))
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))

# Step 7: Test সেটের প্রথম 5টি প্রেডিকশন দেখানো
print("\n🔍 Test Predictions vs Actual (first 5):")
test_result = pd.DataFrame({
    'Predicted': y_pred.round(2),
    'Actual': y_test.values
})
print(test_result.head())


✅ Model Parameters:
Intercept: 11.213237880833589
Coefficients:
  StudyHours : 4.654610212246686
  SleepHours : 1.844752351299615
  Attendance : 0.2843096131005298

📊 Test Set Evaluation:
R-squared (R²): 0.7482323030566602
Mean Squared Error (MSE): 38.25156066835839

🔍 Test Predictions vs Actual (first 5):
   Predicted  Actual
0      69.58   69.95
1      76.93   81.53
2      89.48   84.57
3      64.82   68.87
4      78.29   88.42
