In [1]:
!pip install fastapi uvicorn pandas numpy scikit-learn joblib kmodes pydantic nest_asyncio requests

Collecting fastapi
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting kmodes
  Downloading kmodes-0.12.2-py2.py3-none-any.whl.metadata (8.1 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.1-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.11-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kmodes-0.12.2-py2.py3-none-any.whl (20 kB)
Downloading starlette-0.46.1-py3-none-any.whl (71 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, sta

# importing libraries

In [2]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from kmodes.kprototypes import KPrototypes
import joblib
import json

# 2. Loading and Preprocessing the Data


In [3]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

file_path = "/content/drive/My Drive/Graduation_project/data.csv"

# Load the data
df = pd.read_csv(file_path)

# Drop rows with missing values
df = df.dropna()

# Define feature columns
numerical_cols = ['Hours_Studied', 'Attendance', 'Sleep_Hours', 'Previous_Scores', 'Tutoring_Sessions', 'Physical_Activity']
categorical_cols = ['Parental_Involvement', 'Access_to_Resources', 'Extracurricular_Activities', 'Motivation_Level', 'Internet_Access', 'Family_Income', 'Teacher_Quality', 'School_Type', 'Peer_Influence', 'Learning_Disabilities', 'Parental_Education_Level', 'Distance_from_Home', 'Gender']
controllable_numerical = ['Hours_Studied', 'Attendance', 'Sleep_Hours', 'Tutoring_Sessions', 'Physical_Activity']
controllable_categorical = ['Extracurricular_Activities']

# Create pass/fail target
df['pass_fail'] = (df['Exam_Score'] >= 60).astype(int)

Mounted at /content/drive


# 3. Preparing Features and Targets


In [4]:
# Prepare features and targets
X = df[numerical_cols + categorical_cols]
y_regression = df['Exam_Score']
y_classification = df['pass_fail']

# 4. Preprocessing Pipeline


In [5]:
# Preprocessing for regression and classification
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

# 5. Building Regression and Classification Pipelines


In [6]:
# Regression pipeline
regression_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Classification pipeline
classification_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# 6. Training the Models


In [7]:
# Train models
regression_pipeline.fit(X, y_regression)
classification_pipeline.fit(X, y_classification)

# 7. Saving the Pipelines


In [8]:
# Save pipelines
joblib.dump(regression_pipeline, 'regression_pipeline.joblib')
joblib.dump(classification_pipeline, 'classification_pipeline.joblib')

['classification_pipeline.joblib']

# 8. Clustering with K-Prototypes


In [9]:
# Clustering: Scale numerical features
scaler = StandardScaler()
X_scaled = X.copy()
X_scaled[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Define categorical indices
categorical_indices = [X.columns.get_loc(col) for col in categorical_cols]

# Train K-Prototypes (3 clusters)
kp = KPrototypes(n_clusters=3, init='Huang', random_state=42)
clusters = kp.fit_predict(X_scaled, categorical=categorical_indices)

# Assign clusters to dataframe
df['cluster'] = clusters

# 9. Generating Recommendations


In [10]:
# Extract feature importances from the regression pipeline
feature_importances = regression_pipeline.named_steps['regressor'].feature_importances_
feature_names = regression_pipeline.named_steps['preprocessor'].get_feature_names_out()

# Map feature importances to original controllable features
from collections import defaultdict
importances_dict = defaultdict(float)
for name, importance in zip(feature_names, feature_importances):
    if name.startswith('num__'):
        original_feature = name[5:]  # e.g., 'num__Hours_Studied' -> 'Hours_Studied'
        importances_dict[original_feature] = importance
    elif name.startswith('cat__Extracurricular_Activities_'):
        importances_dict['Extracurricular_Activities'] += importance

# Define controllable features
controllable_numerical = ['Hours_Studied', 'Attendance', 'Sleep_Hours', 'Tutoring_Sessions', 'Physical_Activity']
controllable_features = controllable_numerical + ['Extracurricular_Activities']

# Sort features by importance and categorize
sorted_importances = sorted(importances_dict.items(), key=lambda x: x[1], reverse=True)
high_importance = [feat for feat, imp in sorted_importances[:2]]  # Top 2
medium_importance = [feat for feat, imp in sorted_importances[2:4]]  # Next 2
low_importance = [feat for feat, imp in sorted_importances[4:]]  # Bottom 2

# Compute overall statistics for numerical features
overall_means = df[controllable_numerical].mean()
overall_stds = df[controllable_numerical].std()

# Generate recommendations
recommendations = {}
for cluster in range(3):
    cluster_data = df[df['cluster'] == cluster]
    recs = []

    # Numerical features
    for feature in controllable_numerical:
        cluster_mean = cluster_data[feature].mean()
        overall_mean = overall_means[feature]
        overall_std = overall_stds[feature]

        # Determine importance label
        if feature in high_importance:
            importance_label = "Highly important"
        elif feature in medium_importance:
            importance_label = "Important"
        else:
            importance_label = "Moderately important"

        # Tiered recommendations
        if cluster_mean < overall_mean - 0.5 * overall_std:
            recs.append(f"{importance_label}: Urgently increase {feature} to at least {overall_mean:.1f}")
        elif cluster_mean < overall_mean:
            recs.append(f"{importance_label}: Consider increasing {feature} to at least {overall_mean:.1f}")

    # Extracurricular_Activities
    percentage_no = (cluster_data['Extracurricular_Activities'] == 'No').mean() * 100
    if 'Extracurricular_Activities' in high_importance:
        importance_label = "Highly important"
    elif 'Extracurricular_Activities' in medium_importance:
        importance_label = "Important"
    else:
        importance_label = "Moderately important"

    if percentage_no > 75:
        recs.append(f"{importance_label}: Strongly consider participating in extracurricular activities ({percentage_no:.1f}% do not participate)")
    elif percentage_no > 50:
        recs.append(f"{importance_label}: Consider participating in extracurricular activities ({percentage_no:.1f}% do not participate)")

    recommendations[cluster] = recs

# 10. Saving Clustering Models and Recommendations


In [11]:
# Save models and recommendations
joblib.dump(scaler, 'scaler.joblib')
joblib.dump(kp, 'kprototypes.joblib')
with open('recommendations.json', 'w') as f:
    json.dump(recommendations, f)

print("Models and recommendations saved.")

Models and recommendations saved.


In [12]:
from google.colab import files
files.download('regression_pipeline.joblib')
files.download('classification_pipeline.joblib')
files.download('scaler.joblib')
files.download('kprototypes.joblib')
files.download('recommendations.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [13]:
from google.colab import drive
drive.mount('/content/drive')
!cp regression_pipeline.joblib /content/drive/MyDrive/
!cp classification_pipeline.joblib /content/drive/MyDrive/
!cp scaler.joblib /content/drive/MyDrive/
!cp kprototypes.joblib /content/drive/MyDrive/
!cp recommendations.json /content/drive/MyDrive/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 1. Importing Required Libraries


In [14]:
from fastapi import FastAPI
import pandas as pd
import joblib
import json
from pydantic import BaseModel
import nest_asyncio
import uvicorn
from IPython.display import display

# 2. Initializing the FastAPI App


In [15]:
app = FastAPI()

# 3. Loading Models and Recommendations


In [16]:
# Load models and recommendations
regression_pipeline = joblib.load('regression_pipeline.joblib')
classification_pipeline = joblib.load('classification_pipeline.joblib')
scaler = joblib.load('scaler.joblib')
kp = joblib.load('kprototypes.joblib')
with open('recommendations.json', 'r') as f:
    recommendations = json.load(f)

# 4. Defining Feature Columns


In [17]:
# Define feature columns
numerical_cols = ['Hours_Studied', 'Attendance', 'Sleep_Hours', 'Previous_Scores', 'Tutoring_Sessions', 'Physical_Activity']
categorical_cols = ['Parental_Involvement', 'Access_to_Resources', 'Extracurricular_Activities', 'Motivation_Level', 'Internet_Access', 'Family_Income', 'Teacher_Quality', 'School_Type', 'Peer_Influence', 'Learning_Disabilities', 'Parental_Education_Level', 'Distance_from_Home', 'Gender']
columns = numerical_cols + categorical_cols

# 5. Defining Input Data Structure


In [18]:
# Define input data structure
class StudentData(BaseModel):
    Hours_Studied: float
    Attendance: float
    Parental_Involvement: str
    Access_to_Resources: str
    Extracurricular_Activities: str
    Sleep_Hours: float
    Previous_Scores: float
    Motivation_Level: str
    Internet_Access: str
    Tutoring_Sessions: float
    Family_Income: str
    Teacher_Quality: str
    School_Type: str
    Peer_Influence: str
    Physical_Activity: float
    Learning_Disabilities: str
    Parental_Education_Level: str
    Distance_from_Home: str
    Gender: str

# 6. Creating the Prediction Endpoint


In [19]:
@app.post("/predict")
def predict(data: StudentData):
    # Convert input data to DataFrame
    X_new = pd.DataFrame([data.dict()], columns=columns)

    # Predict exam score
    predicted_score = regression_pipeline.predict(X_new)[0]

    # Predict pass probability
    pass_probability = classification_pipeline.predict_proba(X_new)[0][1]

    # Scale numerical features for clustering
    X_new_scaled = X_new.copy()
    X_new_scaled[numerical_cols] = scaler.transform(X_new[numerical_cols])

    # Define categorical indices for clustering
    categorical_indices = list(range(len(numerical_cols), len(columns)))

    # Predict cluster
    cluster = kp.predict(X_new_scaled, categorical=categorical_indices)[0]

    # Get recommendations for the cluster
    recs = recommendations[str(cluster)]

    # Return results
    return {
        "predicted_score": predicted_score,
        "pass_probability": pass_probability,
        "cluster": int(cluster),
        "recommendations": recs
    }

# 7. Running the FastAPI  ngrok

In [20]:
import threading
import time
def run_server():
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")

thread = threading.Thread(target=run_server)
thread.start()

# Wait for server to start
time.sleep(5)

INFO:     Started server process [229]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


In [27]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [28]:
import uvicorn
import asyncio
from pyngrok import ngrok

# Set up the server configuration
config = uvicorn.Config(app, host="0.0.0.0", port=8001)
server = uvicorn.Server(config)

# Get the current event loop
loop = asyncio.get_event_loop()

# Start the server in the background as a task
task = loop.create_task(server.serve())

# Set up Ngrok tunnel
ngrok.set_auth_token("2tiTnsEvZS0QaDYClcDP7v7eSNM_4UuDWB19FjSCW92mMp4gE")  # Your Ngrok auth token
public_url = ngrok.connect(8001).public_url
print(f"Public API URL: {public_url}")

Downloading ngrok ...

  for meth in dir(handler):


Public API URL: https://d4dc-34-142-216-106.ngrok-free.app


In [29]:


import requests
import time

# Replace with your actual Ngrok URL
public_url = public_url

data = {
    "Hours_Studied": 5.0,
    "Attendance": 90.0,
    "Parental_Involvement": "High",
    "Access_to_Resources": "Yes",
    "Extracurricular_Activities": "Yes",
    "Sleep_Hours": 7.0,
    "Previous_Scores": 85.0,
    "Motivation_Level": "High",
    "Internet_Access": "Yes",
    "Tutoring_Sessions": 2.0,
    "Family_Income": "Medium",
    "Teacher_Quality": "Good",
    "School_Type": "Public",
    "Peer_Influence": "Positive",
    "Physical_Activity": 3.0,
    "Learning_Disabilities": "No",
    "Parental_Education_Level": "Graduate",
    "Distance_from_Home": "Near",
    "Gender": "Male"
}

# Wait briefly to ensure the server is up
time.sleep(2)

try:
    # Send POST request
    response = requests.post(f"{public_url}/predict", json=data, timeout=10)
    response.raise_for_status()  # Raise an error for bad HTTP status codes
    print("API Response:", response.json())
except requests.exceptions.ConnectionError as e:
    print(f"Connection Error: {e} - Check if the server is running on port 8000 and Ngrok tunnel is active.")
except requests.exceptions.Timeout:
    print("Request timed out - Server may be slow or unreachable.")
except requests.exceptions.HTTPError as e:
    print(f"HTTP Error: {e} - Response: {response.text}")
except requests.exceptions.JSONDecodeError as e:
    print(f"JSON Decode Error: {e} - Raw Response: {response.text}")
except Exception as e:
    print(f"Unexpected Error: {e}")

Request timed out - Server may be slow or unreachable.


In [None]:
# https://grok.com/share/bGVnYWN5_e12ed583-8ef7-4c49-881c-62f667048997