<a href="https://colab.research.google.com/github/elorie-bernard-lacroix/SmartStudy/blob/main/SmartStudy/notebooks/3_41_final_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#

In [1]:
!pip install tabpfn
!pip install bayesian-optimization
!pip install sklearn
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install seaborn
!pip install openai
!pip install gradio
!pip install scikit-optimize



Collecting sklearn
  Using cached sklearn-0.0.post12.tar.gz (2.6 kB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m√ó[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m‚îÇ[0m exit code: [1;36m1[0m
  [31m‚ï∞‚îÄ>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m√ó[0m Encountered error while generating package metadata.
[31m‚ï∞‚îÄ>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.


# Load & Preprocess Data


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from tabpfn import TabPFNRegressor

import openai
import getpass

from skopt import gp_minimize
from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
data = pd.read_csv("/content/drive/MyDrive/ECE324_Project/Model/database.csv") #load the dataset
data = data.interpolate(method='linear', limit_direction='forward')
data.duplicated().sum()
data.drop_duplicates(inplace=True)

columns_to_keep = ['Age', 'Gender', 'ParentalEducation', # select relevant columns
                   'StudyTimeWeekly', 'Absences', 'Tutoring', 'ParentalSupport',
                   'Extracurricular', 'Sports', 'Music', 'Volunteering', 'GPA']
data = data[columns_to_keep]

scaler = StandardScaler()
input = scaler.fit_transform(data.drop('GPA', axis=1))

labels = data['GPA']

X_train, X_test, Y_train, Y_test = train_test_split(input, labels, test_size=0.2, random_state=42)

# Train the Model using TabPFN

In [9]:
reg = TabPFNRegressor(random_state=42)
reg.fit(X_train, Y_train)

Y_pred = reg.predict(X_test)
print("MSE:", mean_squared_error(Y_test, Y_pred))
print("MAE:", mean_absolute_error(Y_test, Y_pred))

MSE: 0.039094818963556204
MAE: 0.15616286595164913


## Optimize Habits

In [13]:
# user input
age = 15
gender = 0
study_time_weekly = 4.2
absences = 10
extracurricular = 1
sports = 0
music = 0
volunteering = 0
parental_education = 1
parental_support = 1
tutoring = 1
desired_grade = 4.0


space = [
    Real(0.0, 20.0, name='StudyTimeWeekly'),  # Continuous variable
    Integer(0, 29, name='Absences'),  # Integer variable
    Categorical([0, 1], name='Tutoring'),  # Categorical variable
    Integer(0, 4, name='ParentalSupport'),  # Integer variable
    Categorical([0, 1], name='Extracurricular'),  # Categorical variable
    Categorical([0, 1], name='Sports'),  # Categorical variable
    Categorical([0, 1], name='Music'),  # Categorical variable
    Categorical([0, 1], name='Volunteering')  # Categorical variable
  ]

@use_named_args(space)
def objective(**params):
    user_data = {
        'Age': age,
        'Gender': gender,
        'ParentalEducation': parental_education,
        'StudyTimeWeekly': params['StudyTimeWeekly'],
        'Absences': params['Absences'],
        'Tutoring': params['Tutoring'],
        'ParentalSupport': params['ParentalSupport'],
        'Extracurricular': params['Extracurricular'],
        'Sports': params['Sports'],
        'Music': params['Music'],
        'Volunteering': params['Volunteering']
    }

    user_df = pd.DataFrame(user_data, index=[0])
    print(user_df)

    #process data
    user_input = scaler.transform(user_df)

    # predict grade
    pred_grade = reg.predict([user_input[0]])
    print(pred_grade)
    score = desired_grade - pred_grade[0] # rating based on how close it can get to 4.0

    return score

res = gp_minimize(objective, space, n_calls=50, random_state=0)

print("Best score: ", res.fun)
print("Best parameters: ", res.x)

   Age  Gender  ParentalEducation  StudyTimeWeekly  Absences  Tutoring  \
0   15       0                  1        11.856892        24         1   

   ParentalSupport  Extracurricular  Sports  Music  Volunteering  
0                3                1       0      0             0  
[1.3533225]
   Age  Gender  ParentalEducation  StudyTimeWeekly  Absences  Tutoring  \
0   15       0                  1         5.453126        14         1   

   ParentalSupport  Extracurricular  Sports  Music  Volunteering  
0                2                0       1      0             1  
[2.0084481]
   Age  Gender  ParentalEducation  StudyTimeWeekly  Absences  Tutoring  \
0   15       0                  1         7.364831        28         0   

   ParentalSupport  Extracurricular  Sports  Music  Volunteering  
0                3                0       1      1             1  
[0.67229664]
   Age  Gender  ParentalEducation  StudyTimeWeekly  Absences  Tutoring  \
0   15       0                  1       

# KNN Examples

In [14]:
# Create the neighborhood DataFrame by dropping specific columns
neighborhood = data.drop(columns=['StudyTimeWeekly', 'Absences',
                                   'Tutoring', 'ParentalSupport', 'Extracurricular', 'Sports', 'Music',
                                   'Volunteering'])
print(neighborhood.head())

user_query = {
    'Age': age,
    'Gender': gender,
    'ParentalEducation': parental_education,
    'GPA': desired_grade  # simulate that we want to be near those with target GPA
}
user_query = pd.DataFrame(user_query, index=[0])

# apply weights
weights = {
    'Age': 1.0,
    'Gender': 2.0,
    'ParentalEducation': 1.0,
    'GPA': 100.0
}

weighted_neighborhood = neighborhood.copy()
weighted_user_query = user_query.copy()

for feature, weight in weights.items():
    weighted_neighborhood[feature] *= weight
    weighted_user_query[feature] *= weight

nbrs_norm = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(weighted_neighborhood)
distances3, indices3 = nbrs_norm.kneighbors(weighted_user_query)

nearest_neighbors = data.iloc[indices3.flatten()]
print("Similar successful students:\n", nearest_neighbors[['GPA', 'StudyTimeWeekly', 'Absences', 'Extracurricular', 'Sports', 'Music', 'Volunteering', 'Tutoring']])


   Age  Gender  ParentalEducation       GPA
0   17       1                  2  2.929196
1   18       0                  1  3.042915
2   15       0                  3  0.112602
3   17       1                  3  2.054218
4   17       1                  2  1.288061
Similar successful students:
       GPA  StudyTimeWeekly  Absences  Extracurricular  Sports  Music  \
1919  4.0        17.442121         1                1       1      0   
442   4.0        19.424398         0                0       1      1   
1278  4.0        18.899696         3                1       1      0   
2319  4.0         9.285447         0                1       0      1   
2028  4.0        18.656924         0                1       0      0   

      Volunteering  Tutoring  
1919             0         1  
442              1         0  
1278             0         1  
2319             1         0  
2028             1         1  


In [22]:
openai.api_key = getpass.getpass("üîê Enter your OpenAI API key: ")

üîê Enter your OpenAI API key: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


In [30]:
def generate_recommendations_gpt4(current_habits, optimized_habits, desired_grade):
    prompt = f"""
Student is aiming to improve GPA to {desired_grade}.

Current Habits:
- Study Time: {current_habits['StudyTimeWeekly']} hrs
- Absences: {current_habits['Absences']}
- Extracurricular: {current_habits['Extracurricular']}
- Sports: {current_habits['Sports']}
- Music: {current_habits['Music']}
- Volunteering: {current_habits['Volunteering']}
- Parental Support: {current_habits['ParentalSupport']}
- Tutoring: {current_habits['Tutoring']}

Recommended Optimized Habits:
- Study Time: {optimized_habits['StudyTimeWeekly']} hrs
- Absences: {optimized_habits['Absences']}
- Extracurricular: {optimized_habits['Extracurricular']}
- Sports: {optimized_habits['Sports']}
- Music: {optimized_habits['Music']}
- Volunteering: {optimized_habits['Volunteering']}
- Parental Support: {optimized_habits['ParentalSupport']}
- Tutoring: {optimized_habits['Tutoring']}

As an academic advisor, explain how these changes will help improve the student‚Äôs GPA. Also provide personalized and motivational advice to help them stay on track.
"""

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,
        max_tokens=600
    )

    return response.choices[0].message["content"]


In [31]:
recommendation_text = generate_recommendations_gpt4(current_habits, optimized_habits, desired_grade)
print(recommendation_text)


The recommended optimized habits are designed to help you achieve your academic goals. Here's how they will contribute to your success:

1. Study Time: Increasing your study time from 4.2 to 20 hours per week will give you more time to understand and master the course material. This will lead to better performance on exams and assignments, thus boosting your GPA.

2. Absences: By eliminating absences, you ensure that you do not miss any class material or important announcements. Regular class attendance also shows your professors your commitment to your studies, which can positively influence your class participation grade.

3. Extracurricular Activities: Maintaining involvement in an extracurricular activity can help you develop time management and leadership skills, which are beneficial for academic success.

4. Sports: Participating in a sport can provide a healthy outlet for stress, improve focus, and foster teamwork skills. Balancing academics with sports can also demonstrate your

# Demo

In [34]:
import gradio as gr
import pandas as pd
from sklearn.neighbors import NearestNeighbors

def demo_app(study_time, absences, tutoring, parental_support,
             extracurricular, sports, music, volunteering, target_gpa):

    current_habits = {
        'StudyTimeWeekly': study_time,
        'Absences': absences,
        'Tutoring': tutoring,
        'ParentalSupport': parental_support,
        'Extracurricular': extracurricular,
        'Sports': sports,
        'Music': music,
        'Volunteering': volunteering
    }

    def optimize(user_fixed):
        @use_named_args(space)
        def objective(**params):
            user_data = {
                'Age': age,
                'Gender': gender,
                'ParentalEducation': parental_education,
                'StudyTimeWeekly': params['StudyTimeWeekly'],
                'Absences': params['Absences'],
                'Tutoring': params['Tutoring'],
                'ParentalSupport': params['ParentalSupport'],
                'Extracurricular': params['Extracurricular'],
                'Sports': params['Sports'],
                'Music': params['Music'],
                'Volunteering': params['Volunteering']
            }
            df = pd.DataFrame(user_data, index=[0])
            input_vec = scaler.transform(df)
            pred = reg.predict(input_vec)[0]
            return abs(target_gpa - pred)

        result = gp_minimize(objective, space, n_calls=50, random_state=0)
        return dict(zip([dim.name for dim in space], result.x))

    optimized_habits = optimize({'Age': age, 'Gender': gender, 'ParentalEducation': parental_education})
    summary = generate_recommendations_gpt4(current_habits, optimized_habits, target_gpa)

    query = {    # for generating exmaples
        'Age': age,
        'Gender': gender,
        'ParentalEducation': parental_education,
        'GPA': target_gpa
    }
    user_query_df = pd.DataFrame(query, index=[0])
    weighted_user_query = user_query_df.copy()
    weighted_neighborhood = neighborhood.copy()

    for feat, w in weights.items():
        weighted_user_query[feat] *= w
        weighted_neighborhood[feat] *= w

    knn = NearestNeighbors(n_neighbors=5)
    knn.fit(weighted_neighborhood)
    _, indices = knn.kneighbors(weighted_user_query)

    similar_students = data.iloc[indices.flatten()]
    example_table = similar_students[[
    'GPA',
    'StudyTimeWeekly',
    'Absences',
    'Extracurricular',
    'Sports',
    'Music',
    'Volunteering',
    'Tutoring'
]]

    result_table = pd.DataFrame([optimized_habits])
    return result_table, example_table, summary


def app_ui():
    with gr.Blocks() as app:

        with gr.Row():
            study_time = gr.Number(label="üìò Study Time Weekly (hrs)", value=4.0)
            absences = gr.Number(label="üö´ Absences", value=10)
            tutoring = gr.Radio([0, 1], label="üéì Tutoring (0=No, 1=Yes)", value=1)
            parental_support = gr.Slider(0, 4, step=1, label="üë®‚Äçüë©‚Äçüëß Parental Support", value=1)

        with gr.Row():
            extracurricular = gr.Radio([0, 1], label="üé≠ Extracurricular", value=1)
            sports = gr.Radio([0, 1], label="üèÄ Sports", value=0)
            music = gr.Radio([0, 1], label="üéµ Music", value=0)
            volunteering = gr.Radio([0, 1], label="üôå Volunteering", value=0)

        target_gpa = gr.Number(label="üéØ Target GPA", value=4.0)

        with gr.Row():
            submit = gr.Button("üöÄ Get Personalized Plan")

        output1 = gr.Dataframe(label="‚úÖ Optimized Study Habits")
        output2 = gr.Dataframe(label="üìä Similar Students (KNN)")
        output3 = gr.Textbox(label="üß† GPT Summary", lines=8)

        submit.click(fn=demo_app,
                     inputs=[study_time, absences, tutoring, parental_support,
                             extracurricular, sports, music, volunteering, target_gpa],
                     outputs=[output1, output2, output3])
    return app


with gr.Blocks() as landing: #for landing page
    gr.Markdown("""
<center>
<h1>üéì <span style='color:#4A90E2'>Your Personalized GPA Booster:</span> SmartStudy</h1>
<h3>By <i>Study Architects</i></h3>
<p style="max-width: 700px; font-size: 17px;">
A smart tool that helps students reach their academic goals by recommending better study habits,
backed by real student data and explained using GPT-4.
</p>
</center>
""")


    start_btn = gr.Button("‚ú® Get Started")
    app_container = gr.Column(visible=False)

    start_btn.click(lambda: gr.update(visible=True), None, app_container)

    with app_container:
        app_ui()

landing.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://69d46d454d31fbc39f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


