# Finding the Great Learning Method

This notebook runs a competition between multiple machine learning algorithms to find the **Best Performance Learning Method** for predicting student scores.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Import Models
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR

# Load the data
df = pd.read_csv('src/Final_Marks_Data.csv')

# Rename columns
df = df.rename(columns={
    'Daily Study Hours': 'study_hours',
    'Final Exam Marks (out of 100)': 'score',
    'Attendance (%)': 'attendance',
    'Internal Test 1 (out of 40)': 'test1',
    'Internal Test 2 (out of 40)': 'test2',
    'Assignment Score (out of 10)': 'assignment'
})

# Features & Target
features = ['study_hours', 'attendance', 'test1', 'test2', 'assignment']
target = 'score'

X = df[features]
y = df[target]

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Data loaded and split successfully.")

FileNotFoundError: [Errno 2] No such file or directory: 'Final_Marks_Data.csv'

## Define Candidates

In [None]:
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
    'SVR': SVR()
}

## Configure the Competition

In [None]:
results = []

print("Starting Model Training & Evaluation...\n")

for name, model in models.items():
    # Train
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    
    # Evaluate
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results.append({
        'Model': name,
        'MSE': mse,
        'R2': r2
    })
    
    print(f"{name} completed. R2: {r2:.4f}")

# Create DataFrame for results
results_df = pd.DataFrame(results).sort_values(by='R2', ascending=False)

print("\n--- LEADERBOARD ---")
print(results_df)

## Visualize the Comparison

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(x='R2', y='Model', data=results_df, palette='viridis')
plt.title('Model R2 Score Comparison (Higher is Better)')
plt.xlabel('R-Squared Score')
plt.xlim(0, 1)  # Assuming R2 is between 0 and 1
for index, row in results_df.iterrows():
    plt.text(row['R2'], index, f"{row['R2']:.4f}", va='center')
plt.show()

## Conclusion

In [None]:
best_model_name = results_df.iloc[0]['Model']
best_model_score = results_df.iloc[0]['R2']

print(f"The Winner is: {best_model_name} with an R2 Score of {best_model_score:.4f}!")
print(f"This is the confirmed 'Great Learning Method' for your dataset.")