In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib

In [4]:
generate_synthetic_data()

Unnamed: 0,num_subjects,hours_per_day,num_topics,num_days,recommended_total_study_minutes
0,1,1,35,5,212
1,5,4,50,44,2235
2,4,3,35,38,1674
3,1,8,37,17,1467
4,2,3,20,9,541
...,...,...,...,...,...
995,1,6,34,20,1007
996,1,8,5,45,60
997,4,4,6,44,340
998,3,6,35,41,1086


In [5]:

# Generate the dataset
print("Generating synthetic data...")
synthetic_df = generate_synthetic_data(num_samples=2000)

synthetic_df.describe()

Generating synthetic data...


Unnamed: 0,num_subjects,hours_per_day,num_topics,num_days,recommended_total_study_minutes
count,2000.0,2000.0,2000.0,2000.0,2000.0
mean,3.0485,4.553,27.557,30.022,1107.2575
std,1.410015,2.317731,13.571103,17.414625,534.218373
min,1.0,1.0,5.0,1.0,0.0
25%,2.0,3.0,15.0,15.0,688.75
50%,3.0,5.0,28.0,29.5,1066.0
75%,4.0,7.0,40.0,45.0,1480.25
max,5.0,8.0,50.0,60.0,2788.0


In [6]:

# --- 2. Model Training ---

# Define features (X) and target (y)
X = synthetic_df[['num_subjects', 'hours_per_day', 'num_topics', 'num_days']]
y = synthetic_df['recommended_total_study_minutes']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

print("\nTraining RandomForestRegressor model...")
model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)
print("Model training complete.")

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nModel Evaluation:")
print(f"Mean Absolute Error (MAE): {mae:.2f} minutes")
print(f"R-squared (R2) Score: {r2:.2f}")


model_filename = 'study_schedule_model.joblib'
joblib.dump(model, model_filename)
print(f"\nModel saved to {model_filename}")


def predict_study_time(num_subjects, hours_per_day, num_topics, num_days, loaded_model=None):
    """
    Predicts the recommended total study minutes and average time per topic
    using the trained model.

    Args:
        num_subjects (int): Number of subjects.
        hours_per_day (int): Hours available for study per day.
        num_topics (int): Total number of topics to cover.
        num_days (int): Number of days to cover the topics.
        loaded_model (sklearn.ensemble.RandomForestRegressor, optional): Pre-loaded model.
                                                                        If None, attempts to load from file.

    Returns:
        tuple: (recommended_total_study_minutes, average_time_per_topic_minutes)
               Returns (None, None) if model cannot be loaded.
    """
    if loaded_model is None:
        try:
            model = joblib.load(model_filename)
            print(f"Model loaded from {model_filename}")
        except FileNotFoundError:
            print(f"Error: Model file '{model_filename}' not found. Please train and save the model first.")
            return None, None
    else:
        model = loaded_model

    input_data = pd.DataFrame([[num_subjects, hours_per_day, num_topics, num_days]],
                              columns=['num_subjects', 'hours_per_day', 'num_topics', 'num_days'])
    prediction = model.predict(input_data)[0]


    predicted_minutes = int(max(0, round(prediction)))


    average_time_per_topic_minutes = 0
    if num_topics > 0:
        average_time_per_topic_minutes = predicted_minutes / num_topics

    return predicted_minutes, average_time_per_topic_minutes


print("\n--- Example Prediction ---")

loaded_model = joblib.load(model_filename)

example_subjects = 3
example_hours_day = 4
example_topics = 25
example_days = 30

predicted_minutes, time_per_topic = predict_study_time(
    example_subjects,
    example_hours_day,
    example_topics,
    example_days,
    loaded_model=loaded_model
)

if predicted_minutes is not None:
    print(f"Input: Subjects={example_subjects}, Hours/Day={example_hours_day}, Topics={example_topics}, Days={example_days}")
    print(f"Recommended Total Study Minutes: {predicted_minutes} minutes")
    print(f"Which is approximately {predicted_minutes / 60:.2f} hours over {example_days} days.")
    print(f"Average daily study minutes: {predicted_minutes / example_days:.2f} minutes")
    if time_per_topic is not None:
        print(f"Average time per topic: {time_per_topic:.2f} minutes")

# 5. Interactive Model Testing
print("\n--- Interactive Model Testing ---")
print("Enter your study parameters to get a recommendation.")

try:
    test_subjects = int(input("Enter number of subjects (e.g., 3): "))
    test_hours_day = int(input("Enter hours you can study per day (e.g., 4): "))
    test_topics = int(input("Enter total topics to cover (e.g., 25): "))
    test_days = int(input("Enter days to cover these topics (e.g., 30): "))

    if not all(x > 0 for x in [test_subjects, test_hours_day, test_topics, test_days]):
        print("All inputs must be positive numbers. Please try again.")
    else:
        test_predicted_minutes, test_time_per_topic = predict_study_time(
            test_subjects,
            test_hours_day,
            test_topics,
            test_days,
            loaded_model=loaded_model
        )

        if test_predicted_minutes is not None:
            print("\n--- Your Custom Study Recommendation ---")
            print(f"Input: Subjects={test_subjects}, Hours/Day={test_hours_day}, Topics={test_topics}, Days={test_days}")
            print(f"Recommended Total Study Minutes: {test_predicted_minutes} minutes")
            print(f"Approx. Total Study Hours: {test_predicted_minutes / 60:.2f} hours")
            print(f"Average Daily Study Minutes: {test_predicted_minutes / test_days:.2f} minutes")
            print(f"Average Time Per Topic: {test_time_per_topic:.2f} minutes")
            print("\nRemember, this is a model-based recommendation. Adjust as needed!")
except ValueError:
    print("Invalid input. Please enter integers only.")
except Exception as e:
    print(f"An error occurred: {e}")


Training data shape: (1600, 4)
Testing data shape: (400, 4)

Training RandomForestRegressor model...
Model training complete.

Model Evaluation:
Mean Absolute Error (MAE): 171.26 minutes
R-squared (R2) Score: 0.82

Model saved to study_schedule_model.joblib

--- Example Prediction ---
Input: Subjects=3, Hours/Day=4, Topics=25, Days=30
Recommended Total Study Minutes: 1127 minutes
Which is approximately 18.78 hours over 30 days.
Average daily study minutes: 37.57 minutes
Average time per topic: 45.08 minutes

--- Interactive Model Testing ---
Enter your study parameters to get a recommendation.


Enter number of subjects (e.g., 3):  4
Enter hours you can study per day (e.g., 4):  5
Enter total topics to cover (e.g., 25):  45
Enter days to cover these topics (e.g., 30):  30



--- Your Custom Study Recommendation ---
Input: Subjects=4, Hours/Day=5, Topics=45, Days=30
Recommended Total Study Minutes: 1990 minutes
Approx. Total Study Hours: 33.17 hours
Average Daily Study Minutes: 66.33 minutes
Average Time Per Topic: 44.22 minutes

Remember, this is a model-based recommendation. Adjust as needed!
