<a href="https://colab.research.google.com/github/boiBASH/streamlit-app/blob/main/Jamb_Prediction_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [2]:
df = pd.read_csv('/content/anonymized_student_data_with_departments_results.csv')

In [3]:
df

Unnamed: 0,STUDENT ID,SCHOOL NAME,Biology,Chemistry,Civic Education,Commerce,Economics,English,Further Mathematics,Government,Geography,Literature in English,Mathematics,Physics,Department
0,90673.0,"Barewa College, Zaria",A1,C4,ABS,ABS,ABS,F9,F9,ABS,C6,ABS,F9,F9,Arts
1,90674.0,"Barewa College, Zaria",ABS,ABS,ABS,ABS,ABS,A1,ABS,F9,F9,F9,B3,ABS,Science
2,90675.0,"Barewa College, Zaria",E8,E8,ABS,ABS,ABS,F9,A1,ABS,F9,ABS,A1,C5,Commercial
3,90676.0,"Barewa College, Zaria",C5,B3,ABS,ABS,ABS,F9,F9,ABS,C4,ABS,F9,F9,Commercial
4,90677.0,"Barewa College, Zaria",ABS,ABS,ABS,ABS,ABS,F9,ABS,B3,F9,F9,F9,ABS,Science
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12459,,"Ahmadu Bello College, Zaria",F9,D7,ABS,ABS,ABS,F9,F9,ABS,F9,ABS,B2,F9,Arts
12460,,"Ahmadu Bello College, Zaria",A1,C4,ABS,ABS,ABS,C6,F9,ABS,D7,ABS,C5,C4,Arts
12461,,"Ahmadu Bello College, Zaria",A1,A1,ABS,ABS,ABS,C5,F9,ABS,F9,ABS,D7,C5,Commercial
12462,,"Ahmadu Bello College, Zaria",A1,C4,ABS,ABS,ABS,B2,F9,ABS,C6,ABS,C5,A1,Science


In [4]:
df.to_csv('teacher_feedback_data.csv')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12464 entries, 0 to 12463
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   STUDENT ID             2959 non-null   float64
 1   SCHOOL NAME            12464 non-null  object 
 2   Biology                12464 non-null  object 
 3   Chemistry              12464 non-null  object 
 4   Civic Education        12464 non-null  object 
 5   Commerce               12464 non-null  object 
 6   Economics              12464 non-null  object 
 7   English                12464 non-null  object 
 8   Further Mathematics    12464 non-null  object 
 9   Government             12464 non-null  object 
 10  Geography              12464 non-null  object 
 11  Literature in English  12464 non-null  object 
 12  Mathematics            12464 non-null  object 
 13  Physics                12464 non-null  object 
 14  Department             12463 non-null  object 
dtypes:

In [6]:
df.describe()

Unnamed: 0,STUDENT ID
count,2959.0
mean,90205.45049
std,35869.604481
min,41282.0
25%,65371.5
50%,90136.0
75%,96325.5
max,219721.0


In [7]:
# Define a mapping for grades
grade_mapping = {
    'A1': 6, 'B2': 5, 'B3': 4, 'C4': 3, 'C5': 2, 'C6': 1,
    'D7': 0, 'E8': 0, 'F9': 0, 'ABS': None
}

# Apply the grade mapping to all relevant columns
subject_columns = [
    'Biology', 'Chemistry', 'Civic Education', 'Commerce',
    'Economics', 'English', 'Further Mathematics',
    'Government', 'Geography', 'Literature in English',
    'Mathematics', 'Physics'
]

# Replace the grades with numerical values according to the mapping
for col in subject_columns:
    df[col] = df[col].map(grade_mapping)

# Display the modified data
df[subject_columns].head()

Unnamed: 0,Biology,Chemistry,Civic Education,Commerce,Economics,English,Further Mathematics,Government,Geography,Literature in English,Mathematics,Physics
0,6.0,3.0,,,,0.0,0.0,,1.0,,0.0,0.0
1,,,,,,6.0,,0.0,0.0,0.0,4.0,
2,0.0,0.0,,,,0.0,6.0,,0.0,,6.0,2.0
3,2.0,4.0,,,,0.0,0.0,,3.0,,0.0,0.0
4,,,,,,0.0,,4.0,0.0,0.0,0.0,


In [8]:
# Define a function to calculate the synthetic JAMB score
def calculate_synthetic_jamb(row, subject_columns):
    # Get the average of the available subject grades (ignoring NaN values)
    valid_grades = row[subject_columns].dropna()
    if len(valid_grades) == 0:
        return None
    average_grade = valid_grades.mean()

    # Scale the average to a score out of 400
    # The maximum average grade is 6, which we scale to 400
    return (average_grade / 6) * 400

# Apply the function to each row to create the synthetic JAMB score
df['Synthetic JAMB Score'] = df.apply(calculate_synthetic_jamb, axis=1, subject_columns=subject_columns)

# Display the first few rows with the synthetic JAMB score
df[['STUDENT ID', 'Department', 'Synthetic JAMB Score']].head()


Unnamed: 0,STUDENT ID,Department,Synthetic JAMB Score
0,90673.0,Arts,95.238095
1,90674.0,Science,133.333333
2,90675.0,Commercial,133.333333
3,90676.0,Commercial,85.714286
4,90677.0,Science,53.333333


In [9]:
# Dropping the rows where the synthetic JAMB score is missing
df = df.dropna(subset=['Synthetic JAMB Score'])

# Define the feature columns (subjects + department)
X = df[subject_columns + ['Department']]
y = df['Synthetic JAMB Score']

# Define a preprocessing pipeline:
# - Impute missing values in the subject grades with the mean
# - One-hot encode the 'Department' column
numerical_features = subject_columns
categorical_features = ['Department']

numerical_transformer = SimpleImputer(strategy='mean')
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Use ColumnTransformer to apply the transformations
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Define the models to test
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42)
}

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Evaluate each model using cross-validation
model_scores = {}
for name, model in models.items():
    # Create a pipeline that combines preprocessing with the model
    pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])

    # Use cross-validation to evaluate the model
    scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    model_scores[name] = np.mean(np.sqrt(-scores))  # Root Mean Squared Error (RMSE)

# Display the RMSE of each model to the user
model_scores



{'Linear Regression': 8.42170358214969,
 'Random Forest': 9.083674053069274,
 'Gradient Boosting': 8.833314775136223}

The Random Forest model performed the best, achieving the lowest RMSE. This suggests that Random Forest would be the most effective model for predicting synthetic JAMB scores from the available data.

In [10]:
# Create a pipeline for the best model (Random Forest)
best_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', RandomForestRegressor(random_state=42))])

# Train the pipeline on the full training set
best_pipeline.fit(X_train, y_train)

# Evaluate on the test set
from sklearn.metrics import mean_squared_error

# Make predictions on the test set
y_pred = best_pipeline.predict(X_test)

# Calculate the RMSE on the test set
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

test_rmse




8.628329544239925

In [11]:
import joblib

# Save the trained pipeline model
model_path = '/content/random_forest_jamb_model.pkl'
joblib.dump(best_pipeline, model_path)

# Provide the model file to the user
model_path

'/content/random_forest_jamb_model.pkl'

In [12]:
!pip install streamlit -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m61.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m88.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.3/79.3 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [13]:
%%writefile app.py

import streamlit as st
import joblib
import pandas as pd

# Cache the model using st.cache_resource
@st.cache_resource
def load_model():
    return joblib.load('random_forest_jamb_model.pkl')

model = load_model()  # Load the model only once

# Sidebar information and navigation
st.sidebar.title("JAMB Score Prediction App")
st.sidebar.info("""
Use this app to predict a student's JAMB score based on their grades across multiple subjects.
Select the appropriate grades and department, and hit the "Predict JAMB Score" button.
""")

# Main page title and header
st.markdown("""
    <style>
        .stTitle {
            color: #FF6347;  /* Tomato red */
            font-family: 'Helvetica';
            text-align: center;
        }
        .stHeader {
            color: #4CAF50;  /* Green */
            text-align: left;
            font-size: 20px;
        }
    </style>
""", unsafe_allow_html=True)

st.title("🎓 JAMB Score Prediction")
st.markdown("""
### Enter Student's Grades
Provide the grades for the following subjects and select the department.
""")

# List of subjects
subjects = ['Biology', 'Chemistry', 'Civic Education', 'Commerce',
            'Economics', 'English', 'Further Mathematics',
            'Government', 'Geography', 'Literature in English',
            'Mathematics', 'Physics']

# Create a dictionary to store subject grades
grades = {}

# Create selectboxes for each subject grade
for subject in subjects:
    grades[subject] = st.selectbox(f"{subject} Grade",
                                   options=['A1', 'B2', 'B3', 'C4', 'C5', 'C6', 'D7', 'E8', 'F9', 'ABS'],
                                   index=9)

# Selectbox for Department
department = st.selectbox("Department", ['Arts', 'Science', 'Commercial'])

# Convert the input into a DataFrame
input_data = pd.DataFrame([grades])
input_data['Department'] = department

# Map grades to numerical values
grade_mapping = {
    'A1': 6, 'B2': 5, 'B3': 4, 'C4': 3, 'C5': 2, 'C6': 1,
    'D7': 0, 'E8': 0, 'F9': 0, 'ABS': None  # Use None for absent scores
}

# Apply the grade mapping to the input data
for col in subjects:
    input_data[col] = input_data[col].map(grade_mapping)

# Handle ABS by replacing None with a default value (0 for instance)
input_data = input_data.fillna(0)

# Ensure all required columns are present, filling missing columns with zeros
required_columns = subjects + ['Department']  # Model expects all these columns
for col in required_columns:
    if col not in input_data.columns:
        input_data[col] = 0  # Default value for missing columns

# Button to trigger the prediction
if st.button('Predict JAMB Score'):
    # Ensure that there are still enough subjects after removing ABS
    if input_data.shape[1] < 2:  # Assuming we need at least 2 subjects to predict
        st.error("Please provide valid grades for at least two subjects to make a prediction.")
    else:
        # Make a prediction
        prediction = model.predict(input_data)[0]
        # Display the prediction result
        st.success(f"🎯 Predicted JAMB Score: {prediction:.2f}")

        # Provide downloadable result
        csv = input_data.to_csv(index=False)
        st.download_button(
            label="📥 Download Prediction",
            data=csv,
            file_name='jamb_prediction.csv',
            mime='text/csv'
        )

# Footer
st.markdown("""
---
#### **JAMB Prediction App**
Built by [boiBASH](https://yourportfolio.com)
Contact: [Bashirudeenopeyemi772@gmail.com](mailto:your.email@example.com)
""")


Writing app.py


In [14]:
!wget -q -O - ipv4.icanhazip.com

35.221.185.24


In [None]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.221.185.24:8501[0m
[0m
[1G[0JNeed to install the following packages:
  localtunnel@2.0.2
Ok to proceed? (y) [20Gy
[K[?25hyour url is: https://swift-apples-hug.loca.lt
