In [1]:
from openai import AzureOpenAI
import pickle
import pandas as pd
import joblib

import tensorflow as tf
from tensorflow.keras.models import load_model

In [2]:
ENDPOINT = "https://mango-bush-0a9e12903.5.azurestaticapps.net/api/v1"
API_KEY = "f839c878-573c-4d2d-984d-0c70a8618775"

API_VERSION = "2024-02-01"
MODEL_NAME = "gpt-4o"

client = AzureOpenAI(
    azure_endpoint=ENDPOINT,
    api_key=API_KEY,
    api_version=API_VERSION,
)

In [28]:
# example
user_input = {
    # Data only for student level
    'sex': 'F', 
    'age': 17, 
    'address': 'U', 
    'famsize': 'GT3', 
    'Pstatus': 'T',
    'Medu': 2, 
    'Fedu': 2, 
    'Mjob': 'at_home', 
    'Fjob': 'teacher', 
    'reason': 'course',
    'guardian': 'mother', 
    'traveltime': 2, 
    'studytime': 2, 
    'failures': 0,
    'schoolsup': 'yes', 
    'famsup': 'yes', 
    'paid': 'no', 
    'activities': 'yes',
    'nursery': 'yes', 
    'higher': 'yes', 
    'internet': 'yes', 
    'romantic': 'no',
    'famrel': 4, 
    'freetime': 3, 
    'goout': 3, 
    'Dalc': 1, 
    'Walc': 1, 
    'health': 3,
    'absences': 4,

    # Data only for average score
    'EthnicGroup': 'Group B',
    'LunchType': 'Standard',
    'TestPrep': 'None',
    'ParentMaritalStatus': 'Married',
    'PracticeSport': 'Yes',
    'IsFirstChild': 'No',
    'WklyStudyHours': '10',
    'MathScore': 88, 
    'ReadingScore': 90,
    'WritingScore': 87,

    # Data only for learning pace
    'ParentalEducation': 'HighSchool',
    'Absences': 3,
    'Tutoring': 'Yes',
    'ParentalSupport': 'Yes',
    'Extracurricular': 'No',
    'GPA': 3.5
}

In [29]:
# Load models and preprocessors
learning_pace_model = pickle.load(open("models_and_preprocessor/learning_pace.pkl", "rb"))
average_score_model = load_model('models_and_preprocessor/average_score_model.h5', compile=False)
average_score_model.compile(optimizer='adam', loss='mse')
average_score_preprocessor = joblib.load('models_and_preprocessor/preprocessor.pkl')
student_level_model = joblib.load('models_and_preprocessor/logistic_model.pkl')
label_encoders = joblib.load('models_and_preprocessor/label_encoders.pkl')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [30]:
type(user_input['age'])

int

In [32]:
# Updated preprocessing functions to handle shared attributes
# Process and Predict pace
def preprocess_data_for_pace(user_input):
    relevant_data = {
        'Age': user_input['age'],
        'Gender': 0 if user_input['sex'] == 'M' else 1,
        'ParentalEducation': {
            'HighSchool': 0, 'SomeCollege': 1, 'AssociateDegree': 2, 'Bachelor': 3, 'Master': 4
        }.get(user_input['ParentalEducation'], 0),
        'Absences': user_input['Absences'],
        'Tutoring': 1 if user_input['Tutoring'] == 'Yes' else 0,
        'ParentalSupport': 1 if user_input['ParentalSupport'] == 'Yes' else 0,
        'Extracurricular': 1 if user_input['Extracurricular'] == 'Yes' else 0,
        'GPA': user_input['GPA']
    }

    # print(relevant_data)
    return relevant_data

def predict_learning_pace(model, student_data):
    preprocessed_data = preprocess_data_for_pace(student_data)
    student_df = pd.DataFrame([preprocessed_data])
    prediction = model.predict(student_df)[0]  # 0 for slow, 1 for fast
    print("fast learner" if prediction == 1 else "slow learner")
    return "fast learner" if prediction == 1 else "slow learner"

# Process and predict average score
def preprocess_data_for_score(user_input):
    relevant_data = {
        'Gender': 'Male' if user_input['sex'] == 'M' else 'Female',
        'EthnicGroup': user_input['EthnicGroup'],
        'ParentEduc': user_input['ParentalEducation'],
        'LunchType': user_input['LunchType'],
        'TestPrep': user_input['TestPrep'],
        'ParentMaritalStatus': user_input['ParentMaritalStatus'],
        'PracticeSport': user_input['PracticeSport'],
        'IsFirstChild': user_input['IsFirstChild'],
        'WklyStudyHours': user_input['WklyStudyHours'],
        'MathScore': user_input['MathScore'],
        'ReadingScore': user_input['ReadingScore'],
        'WritingScore': user_input['WritingScore']
    }
    user_df = pd.DataFrame([relevant_data])
    return average_score_preprocessor.transform(user_df)

def predict_average_score(preprocessed_input):
    """
    Predict the average score based on the preprocessed input.

    Args:
    preprocessed_input (numpy.array): The preprocessed input ready for model prediction.

    Returns:
    float: The predicted average score.
    """
    # The model expects a 2D array for input, ensure preprocessed_input is correctly shaped
    prediction = average_score_model.predict(preprocessed_input)
    return prediction[0][0] 

# Process and predict student level
def preprocess_data_for_level(user_input):
    # Ensuring shared attributes are considered if they are used
    encoded_data = {}
    for feature in label_encoders:
        value = user_input.get(feature)
        if value in label_encoders[feature].classes_:
            encoded_data[feature] = label_encoders[feature].transform([value])[0]
        else:
            print(f"Warning: Unseen label '{value}' for feature '{feature}'. Using default value.")
            encoded_data[feature] = label_encoders[feature].transform([label_encoders[feature].classes_[0]])[0]
    return pd.DataFrame([encoded_data])

def predict_student_level(user_input, model, label_encoders):
    preprocessed_data = preprocess_data_for_level(user_input)
    if preprocessed_data is not None:
        prediction = model.predict(preprocessed_data)[0]
        return "above avg" if prediction == 1 else "below avg"
    else:
        return "Unable to generate prediction due to unseen labels."

# Combined generative method
def generate_study_plan(user_input):
    # pace_data = preprocess_data_for_pace(user_input)
    score_data = preprocess_data_for_score(user_input)
    # level_data = preprocess_data_for_level(user_input)

    learning_pace = predict_learning_pace(learning_pace_model, user_input)
    average_score = predict_average_score(score_data)
    student_level = predict_student_level(user_input, student_level_model, label_encoders)

    # Example of a generative method call using messages, with shared attributes handled in preprocessing
    MESSAGES = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f"I need help creating a study plan. \
         My predicted learning pace is {learning_pace}, if learning pace is fast, student can be multi-tasked and assign to 4 or more hours per day. Else assign student to study less than 4 hours per day. \
         Average score is around {average_score:.1f}, average score is calculated from past exam scores. \
         And student level is {student_level}, such level is categorized above average or below average depend on prediction model."}, 
        {"role": "assistant", "content": "Sure, I'd be happy to help! What subjects or topics do you need to include in your study plan?"},
        {"role": "user", "content": "The plan should cover mathematics over the next week."},
        {"role": "assistant", "content": "Do you have any specific goals or exams for these subjects during this period?"},
        {"role": "user", "content": "Yes, I'm preparing for midterms and need to cover calculus in mathematics."}
    ]

    completion = client.chat.completions.create(
        model=MODEL_NAME,
        # prompt=start_prompt,
        messages=MESSAGES
    )

    response_content = completion.choices[0].message.content
    return response_content


study_plan = generate_study_plan(user_input)
print(study_plan)

fast learner
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
Great! Let's create a structured study plan for your calculus midterms over the next week. Based on your status as a fast learner capable of multitasking and studying more than 4 hours per day, I'll structure your study plan accordingly.

### 7-Day Study Plan for Calculus Midterms

#### Day 1: Functions and Limits
- **9:00 AM - 11:00 AM:** Review lecture notes and textbook chapters on Functions.
- **11:15 AM - 1:15 PM:** Practice problems on Functions.
- **1:15 PM - 2:15 PM:** Lunch/Break
- **2:15 PM - 4:15 PM:** Review lecture notes and textbook chapters on Limits.
- **4:30 PM - 6:30 PM:** Practice problems on Limits.
- **8:00 PM - 9:00 PM:** Review key concepts and make summary notes.

#### Day 2: Continuity and Derivatives
- **9:00 AM - 11:00 AM:** Study continuity from textbook and notes.
- **11:15 AM - 1:15 PM:** Practice problems on Continuity.
- **1:15 PM - 2:15 PM:** Lunch/Break
- **2:15 PM - 4