<a href="https://colab.research.google.com/github/elorie-bernard-lacroix/SmartStudy/blob/main/SmartStudy/notebooks/3_41_final_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#

In [1]:
!pip install tabpfn
!pip install bayesian-optimization
!pip install sklearn
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install seaborn
!pip install openai
!pip install gradio

Collecting tabpfn
  Downloading tabpfn-2.0.8-py3-none-any.whl.metadata (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.1->tabpfn)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.1->tabpfn)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.1->tabpfn)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.1->tabpfn)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.1->tabpfn)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch<3,>=2.1->tabpfn)
  Downloadin

# Load & Preprocess Data


In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from tabpfn import TabPFNRegressor

import openai
import getpass

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
data = pd.read_csv("/content/drive/MyDrive/ECE324_Project/Model/database.csv") #load the dataset
data = data.interpolate(method='linear', limit_direction='forward')
data.drop_duplicates(inplace=True)

columns_to_keep = ['StudentID', 'Age', 'Gender', 'Ethnicity', 'ParentalEducation', # select relevant columns
                   'StudyTimeWeekly', 'Absences', 'Tutoring', 'ParentalSupport',
                   'Extracurricular', 'Sports', 'Music', 'Volunteering', 'GPA', 'GradeClass']
data = data[columns_to_keep]

neighborhood = data.drop(columns=['StudentID', 'Ethnicity', 'StudyTimeWeekly', 'Absences',
                                  'Tutoring', 'ParentalSupport', 'Extracurricular', 'Sports',
                                  'Music', 'Volunteering', 'GradeClass'])

scaler = StandardScaler()
input_features = scaler.fit_transform(data.drop(['GPA'], axis=1))
labels = data['GPA']

# Train the Model using TabPFN

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(input_features, labels, test_size=0.2, random_state=42)

reg = TabPFNRegressor(random_state=42)
reg.fit(X_train, Y_train)

Y_pred = reg.predict(X_test)
print("MSE:", mean_squared_error(Y_test, Y_pred))
print("MAE:", mean_absolute_error(Y_test, Y_pred))

KeyboardInterrupt: 

In [None]:
from skopt import gp_minimize
from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args

# user input
age = 15
gender = 0
study_time_weekly = 4.2
absences = 10
extracurricular = 1
sports = 0
music = 0
volunteering = 0
parental_education = 1
parental_support = 1
tutoring = 1
desired_grade = 4.0


space = [
    Real(0.0, 20.0, name='StudyTimeWeekly'),  # Continuous variable
    Integer(0, 29, name='Absences'),  # Integer variable
    Categorical([0, 1], name='Tutoring'),  # Categorical variable
    Integer(0, 4, name='ParentalSupport'),  # Integer variable
    Categorical([0, 1], name='Extracurricular'),  # Categorical variable
    Categorical([0, 1], name='Sports'),  # Categorical variable
    Categorical([0, 1], name='Music'),  # Categorical variable
    Categorical([0, 1], name='Volunteering')  # Categorical variable
  ]

@use_named_args(space)
def objective(**params):
    user_data = {
        'Age': age,
        'Gender': gender,
        'ParentalEducation': parental_education,
        'StudyTimeWeekly': params['StudyTimeWeekly'],
        'Absences': params['Absences'],
        'Tutoring': params['Tutoring'],
        'ParentalSupport': params['ParentalSupport'],
        'Extracurricular': params['Extracurricular'],
        'Sports': params['Sports'],
        'Music': params['Music'],
        'Volunteering': params['Volunteering']
    }

    user_df = pd.DataFrame(user_data, index=[0])
    print(user_df)

    #process data
    user_input = scaler.transform(user_df)

    # predict grade
    pred_grade = reg.predict([user_input[0]])
    print(pred_grade)
    score = desired_grade - pred_grade[0] # rating based on how close it can get to 4.0

    return score

res = gp_minimize(objective, space, n_calls=50, random_state=0)

print("Best score: ", res.fun)
print("Best parameters: ", res.x)

In [None]:
user_query = {
    'Age': age,
    'Gender': gender,
    'ParentalEducation': parental_education,
    'GPA': desired_grade  # simulate that we want to be near those with target GPA
}
user_query = pd.DataFrame(user_query, index=[0])

# Apply weights
weights = {
    'Age': 1.0,
    'Gender': 2.0,
    'ParentalEducation': 1.0,
    'GPA': 100.0
}

weighted_neighborhood = neighborhood.copy()
weighted_user_query = user_query.copy()

for feature, weight in weights.items():
    weighted_neighborhood[feature] *= weight
    weighted_user_query[feature] *= weight

nbrs_norm = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(weighted_neighborhood)
distances3, indices3 = nbrs_norm.kneighbors(weighted_user_query)

nearest_neighbors = data.iloc[indices3.flatten()]
print("🎯 Similar successful students:\n", nearest_neighbors[['GPA', 'StudyTimeWeekly', 'Absences', 'Extracurricular', 'Tutoring']])


In [None]:
api_key = getpass.getpass("Enter OpenAI API key: ")

In [None]:
openai.api_key = api_key

def generate_advice(current_habits, optimized_habits, target_gpa):
    prompt = f"""
Student wants to increase GPA to {target_gpa}.

Current:
- Study: {current_habits['StudyTimeWeekly']} hrs
- Absences: {current_habits['Absences']}
- Extracurricular: {current_habits['Extracurricular']}
- Sports: {current_habits['Sports']}
- Music: {current_habits['Music']}
- Volunteering: {current_habits['Volunteering']}
- Parental Support: {current_habits['ParentalSupport']}
- Tutoring: {current_habits['Tutoring']}

Recommended:
- Study: {optimized_habits['StudyTimeWeekly']} hrs
- Absences: {optimized_habits['Absences']}
- Extracurricular: {optimized_habits['Extracurricular']}
- Sports: {optimized_habits['Sports']}
- Music: {optimized_habits['Music']}
- Volunteering: {optimized_habits['Volunteering']}
- Parental Support: {optimized_habits['ParentalSupport']}
- Tutoring: {optimized_habits['Tutoring']}

You are an academic advisor. Write a motivational and personalized explanation.
"""
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7
    )

    return response['choices'][0]['message']['content']

In [None]:
current_habits = {
    'StudyTimeWeekly': study_time_weekly,
    'Absences': absences,
    'Extracurricular': extracurricular,
    'Sports': sports,
    'Music': music,
    'Volunteering': volunteering,
    'ParentalSupport': parental_support,
    'Tutoring': tutoring
}

optimized_habits = dict(zip([dim.name for dim in space], res.x))
advice = generate_advice(current_habits, optimized_habits, desired_grade)
print(advice)

# Demo

In [None]:
import gradio as gr
import pandas as pd

def demo_app(study_time, absences, tutoring, parental_support,
             extracurricular, sports, music, volunteering, target_gpa):

    current_habits = {
        'StudyTimeWeekly': study_time,
        'Absences': absences,
        'Tutoring': tutoring,
        'ParentalSupport': parental_support,
        'Extracurricular': extracurricular,
        'Sports': sports,
        'Music': music,
        'Volunteering': volunteering
    }

    def optimize(user_fixed):
        @use_named_args(space)
        def objective(**params):
            user_data = {
                'Age': age,
                'Gender': gender,
                'ParentalEducation': parental_education,
                'StudyTimeWeekly': params['StudyTimeWeekly'],
                'Absences': params['Absences'],
                'Tutoring': params['Tutoring'],
                'ParentalSupport': params['ParentalSupport'],
                'Extracurricular': params['Extracurricular'],
                'Sports': params['Sports'],
                'Music': params['Music'],
                'Volunteering': params['Volunteering']
            }
            df = pd.DataFrame(user_data, index=[0])
            input_vec = scaler.transform(df)
            pred = reg.predict(input_vec)[0]
            return abs(target_gpa - pred)

        result = gp_minimize(objective, space, n_calls=50, random_state=0)
        return dict(zip([dim.name for dim in space], result.x))

    optimized_habits = optimize({'Age': age, 'Gender': gender, 'ParentalEducation': parental_education})
    summary = generate_advice(current_habits, optimized_habits, target_gpa)

    query = { #for exmaples generation
        'Age': age,
        'Gender': gender,
        'ParentalEducation': parental_education,
        'GPA': target_gpa
    }
    user_query_df = pd.DataFrame(query, index=[0])
    weighted_user_query = user_query_df.copy()
    weighted_neighborhood = neighborhood.copy()

    for feat, w in weights.items():
        weighted_user_query[feat] *= w
        weighted_neighborhood[feat] *= w

    knn = NearestNeighbors(n_neighbors=5)
    knn.fit(weighted_neighborhood)
    _, indices = knn.kneighbors(weighted_user_query)

    similar_students = data.iloc[indices.flatten()]
    example_table = similar_students[['GPA', 'StudyTimeWeekly', 'Absences', 'Extracurricular', 'Tutoring']]

    result_table = pd.DataFrame([optimized_habits])
    return result_table, example_table, summary


def show_inputs():
    with gr.Blocks() as app:
        gr.Markdown("""
        ## 📚 SmartStudy: Your Personalized GPA Booster
        **By: Study Architects**
        Helping students build smarter study habits using real-world data and AI.
        Enter your current study habits and we'll recommend improvements, show you real examples, and explain everything with GPT-4.

        ---
        """)

        with gr.Row():
            study_time = gr.Number(label="Study Time Weekly (hrs)", value=4.0)
            absences = gr.Number(label="Absences", value=10)
            tutoring = gr.Radio([0, 1], label="Tutoring (0=No, 1=Yes)", value=1)
            parental_support = gr.Slider(0, 4, step=1, label="Parental Support", value=1)

        with gr.Row():
            extracurricular = gr.Radio([0, 1], label="Extracurricular", value=1)
            sports = gr.Radio([0, 1], label="Sports", value=0)
            music = gr.Radio([0, 1], label="Music", value=0)
            volunteering = gr.Radio([0, 1], label="Volunteering", value=0)

        target_gpa = gr.Number(label="Target GPA", value=4.0)

        with gr.Row():
            submit = gr.Button("🎯 Get Personalized Plan")

        output1 = gr.Dataframe(label="📘 Optimized Study Habits")
        output2 = gr.Dataframe(label="📊 Real Student Examples (KNN)")
        output3 = gr.Textbox(label="🧠 GPT Summary", lines=8)

        submit.click(fn=demo_app,
                     inputs=[study_time, absences, tutoring, parental_support,
                             extracurricular, sports, music, volunteering, target_gpa],
                     outputs=[output1, output2, output3])
    return app



with gr.Blocks() as landing: #as landing page
    gr.Markdown("""
    # 🎓 Welcome to SmartStudy
    **By Study Architects**
    A smart tool that helps students reach their academic goals by recommending better study habits, backed by real data and AI insights.

    👉 Click below to get started!
    """)
    start_btn = gr.Button("🚀 Get Started")
    container = gr.Column(visible=False)

    def launch_app():
        container.update(visible=True)

    start_btn.click(fn=launch_app, outputs=container)
    with container:
        show_inputs()

landing.launch()
