In [1]:
# READING THE DATASET AND BASIC PREPROCESSING

import pandas as pd
import numpy as np
df = pd.read_csv('student-mat.csv', sep=';')


df = df.drop('school', axis=1)
df = df.drop('Dalc', axis=1)
df = df.drop('Walc', axis=1)
df['G1'] = df['G1'].apply(lambda x: int(x.replace("'", "")))
df['G2'] = df['G2'].apply(lambda x: int(x.replace("'", "")))
df['G3'] = df['G3'].apply(lambda x: int(x.replace("'", "")))
X = df.drop('G3', axis=1)
y = df['G3']
y = np.array(y)





# CREATING A PIPELINE FOR THE MODEL AND ENCODING THE CATEGORICAL DATA

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor as RandomForestRegression



# Define the columns to be one-hot encoded
categorical_columns = ["sex", "address", "famsize", "Pstatus","Mjob", "Fjob", "reason", "guardian", "schoolsup", "famsup", "paid", "activities", "nursery", "higher", "internet", "romantic"]


# Create transformers  
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
numeric_transformer = Pipeline(steps=[('num', 'passthrough')])  # Placeholder for numeric features

categorical_features = categorical_columns
categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder())])

# Create a column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

regressor = RandomForestRegression()  

# Create the final pipeline with preprocessing and regression model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', regressor)
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

'''
for i in range(len(y_pred)):
    if y_pred[i] < 0:
        y_pred[i] = 0
    elif y_pred[i] > 20:
        y_pred[i] = 20
    else:
        y_pred[i] = y_pred[i]
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
'''




# INPUT DATA FOR PREDICTION
input = [['F', 14, 'U', 'GT3', 'T', 4, 4,'at_home','other','reputation','mother',3,4,0,'yes','yes','no','no','yes','yes','yes','no',3,3,4,3,16,'15.4','13']]
columns = ["sex", 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu','Mjob','Fjob','reason','guardian','traveltime','studytime','failures','schoolsup','famsup','paid','activities','nursery','higher','internet','romantic','famrel','freetime','goout','health','absences','G1','G2']
main = pd.DataFrame(input, columns=columns)
main['G1'] = main['G1'].apply(lambda x: float(x.replace("'", "")))
main['G2'] = main['G2'].apply(lambda x: float(x.replace("'", "")))

i=0
while(i<len(categorical_columns)):
    main[categorical_columns[i]] = "'" + main[categorical_columns[i]] + "'"
    i+=1

pred = pipeline.predict(main)
print(pred)

print(main)

[13.07]
   sex  age address famsize Pstatus  Medu  Fedu       Mjob     Fjob  \
0  'F'   14     'U'   'GT3'     'T'     4     4  'at_home'  'other'   

         reason  ... higher  internet  romantic  famrel freetime goout health  \
0  'reputation'  ...  'yes'     'yes'      'no'       3        3     4      3   

  absences    G1    G2  
0       16  15.4  13.0  

[1 rows x 29 columns]


STUDENT PROFILING - EXTRA CURRICULAR FEATURES

In [2]:
def generate_student_profile(input_data, predicted_final_grade):
    # Define categories for ratings (e.g., for famrel, freetime, etc.)
    rating_categories = {
        1: 'very bad',
        2: 'bad',
        3: 'moderate',
        4: 'good',
        5: 'excellent'
    }

    '''
    # Define categories for alcohol consumption (Dalc and Walc)
    alcohol_categories = {
        1: 'very low',
        2: 'low',
        3: 'moderate',
        4: 'high',
        5: 'very high'
    }
    '''

    # Map numeric ratings to their categories
    def map_to_category(value, category_mapping):
        return category_mapping.get(value, 'unknown')

    # Create the student profile dictionary
    profile = {
        'Basic Information': {
            'Sex': input_data['sex'].iloc[0],
            'Age': input_data['age'].iloc[0],
            'Address': input_data['address'].iloc[0],
            'Family Size': input_data['famsize'].iloc[0],
            'Parent\'s Cohabitation Status': input_data['Pstatus'].iloc[0]
        },
        'Academic History': {
            'Mother\'s Education': map_to_category(input_data['Medu'].iloc[0], rating_categories),
            'Father\'s Education': map_to_category(input_data['Fedu'].iloc[0], rating_categories),
            'Weekly Study Time': map_to_category(input_data['studytime'].iloc[0], rating_categories),
            'Past Class Failures': input_data['failures'].iloc[0],
            'Extra Educational Support': input_data['schoolsup'].iloc[0],
            'Family Educational Support': input_data['famsup'].iloc[0],
            'Extra Paid Classes': input_data['paid'].iloc[0],
            'Extra-curricular Activities': input_data['activities'].iloc[0],
            'Attended Nursery School': input_data['nursery'].iloc[0],
            'Aspires for Higher Education': input_data['higher'].iloc[0],
            'Internet Access at Home': input_data['internet'].iloc[0],
            'In a Romantic Relationship': input_data['romantic'].iloc[0],
            'Quality of Family Relationships': map_to_category(input_data['famrel'].iloc[0], rating_categories),
            'Free Time After School': map_to_category(input_data['freetime'].iloc[0], rating_categories),
            'Going Out with Friends': map_to_category(input_data['goout'].iloc[0], rating_categories),
            #'Workday Alcohol Consumption': map_to_category(input_data['Dalc'].iloc[0], alcohol_categories),
            #'Weekend Alcohol Consumption': map_to_category(input_data['Walc'].iloc[0], alcohol_categories),
            'Current Health Status': map_to_category(input_data['health'].iloc[0], rating_categories),
            'Number of School Absences': input_data['absences'].iloc[0]
        },
        'Academic Performance': {
            'First Period Grade (G1)': input_data['G1'].iloc[0],
            'Second Period Grade (G2)': input_data['G2'].iloc[0],
            'Predicted Final Grade (G3)': predicted_final_grade
        }
    }

    # Define strengths and weaknesses based on criteria
    strengths = []
    weaknesses = []

    # Add strengths and weaknesses based on criteria (you can customize this)
    if input_data['studytime'].iloc[0] >= 3:
        strengths.append('Strong commitment to education.')
    else:
        weaknesses.append('Moderate weekly study time; could benefit from more study hours.')

    if input_data['famrel'].iloc[0] >= 4:
        strengths.append('Good family relationships.')
    else:
        weaknesses.append('Less than excellent family relationships.')

    if input_data['health'].iloc[0] == 5:
        strengths.append('Excellent health status.')
    else:
        weaknesses.append('Health status could be improved.')

    if input_data['freetime'].iloc[0] >= 3:
        strengths.append('Plenty of free time.')
    else:
        weaknesses.append('Less than moderate free time.')
    
    if input_data['absences'].iloc[0] <= 2:
        strengths.append('Very few school absences.')
    else:
        weaknesses.append('Too many school absences.')
    
    if input_data['famsup'].iloc[0]:
        strengths.append('Family provides educational support.')
    else:
        weaknesses.append('Family does not provide educational support.')
    
    if input_data['activities'].iloc[0] == 'yes':
        strengths.append('Participates in extracurricular activities.')
    else:
        weaknesses.append('Does not participate in extracurricular activities.')

    if input_data['traveltime'].iloc[0] == 1:
        strengths.append('Short travel time to school.')
    elif input_data['traveltime'].iloc[0] == 4:
        weaknesses.append('Long travel time to school.')

    if input_data['internet'].iloc[0]:
        strengths.append('Access to the internet at home.')
    else:
        weaknesses.append('No access to the internet at home.')

    if input_data['famsize'].iloc[0] == 'LE3':
        strengths.append('Comes from a small family.')
    else:
        weaknesses.append('Comes from a large family.')

    if input_data['goout'].iloc[0] <= 2:
        strengths.append('Spends limited time going out with friends.')
    else:
        weaknesses.append('Frequent outings with friends may affect study time.')


    # Add strengths and weaknesses to the profile
    profile['Strengths'] = strengths
    profile['Weaknesses'] = weaknesses

    return profile


# Generate the student profile
student_profile = generate_student_profile(main, pred)

# Print the student profile
for section, attributes in student_profile.items():
    if (section == 'Strengths') or (section == 'Weaknesses'):
        continue
    print(f'{section}:\n')
    if isinstance(attributes, dict):
        for attribute, value in attributes.items():
            print(f'- {attribute}: {value}')
    else:
        break
    print()

# Print strengths and weaknesses together
print('Strengths:')
for strength in student_profile['Strengths']:
    print(f'- {strength}')
print()

print('Weaknesses:')
for weakness in student_profile['Weaknesses']:
    print(f'- {weakness}')
print()

Basic Information:

- Sex: 'F'
- Age: 14
- Address: 'U'
- Family Size: 'GT3'
- Parent's Cohabitation Status: 'T'

Academic History:

- Mother's Education: good
- Father's Education: good
- Weekly Study Time: good
- Past Class Failures: 0
- Extra Educational Support: 'yes'
- Family Educational Support: 'yes'
- Extra Paid Classes: 'no'
- Extra-curricular Activities: 'no'
- Attended Nursery School: 'yes'
- Aspires for Higher Education: 'yes'
- Internet Access at Home: 'yes'
- In a Romantic Relationship: 'no'
- Quality of Family Relationships: moderate
- Free Time After School: moderate
- Going Out with Friends: good
- Current Health Status: moderate
- Number of School Absences: 16

Academic Performance:

- First Period Grade (G1): 15.4
- Second Period Grade (G2): 13.0
- Predicted Final Grade (G3): [13.07]

Strengths:
- Strong commitment to education.
- Plenty of free time.
- Family provides educational support.
- Access to the internet at home.

Weaknesses:
- Less than excellent family re