In [9]:
import pandas as pd
import joblib

#  Load the pkl
model = joblib.load('logistic_model.pkl')
label_encoders = joblib.load('label_encoders.pkl')

# Load dataset
new_data_path = 'por_G3.csv'  
new_data = pd.read_csv(new_data_path)

# Encode categorical features
def encode_features(data, encoders):
    for column in encoders.keys():
        if column in data.columns:
            le = encoders[column]
            # Check for unseen labels and filter them out
            unseen_labels = data[column][~data[column].isin(le.classes_)]
            if not unseen_labels.empty:
                print(f"Unseen labels in column '{column}': {unseen_labels.unique()}")
                data = data[data[column].isin(le.classes_)]  # Filter out unseen labels
            
            data[column] = le.transform(data[column])
    return data

new_data_encoded = encode_features(new_data.copy(), label_encoders)

# Select the feature
features = [
    'sex', 'age', 'address', 'famsize', 'Pstatus', 
    'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason', 
    'guardian', 'traveltime', 'studytime', 'failures', 
    'schoolsup', 'famsup', 'paid', 'activities', 
    'nursery', 'higher', 'internet', 'romantic', 
    'famrel', 'freetime', 'goout', 'Dalc', 
    'Walc', 'health', 'absences'
]

X_new = new_data_encoded[features]
predictions = model.predict(X_new)

# Prediction
new_data['Predicted_G3_label'] = predictions
print(new_data.head())



  school sex  age address famsize Pstatus  Medu  Fedu     Mjob      Fjob  ...  \
0     GP   F   18       U     GT3       A     4     4  at_home   teacher  ...   
1     GP   F   17       U     GT3       T     1     1  at_home     other  ...   
2     GP   F   15       U     LE3       T     1     1  at_home     other  ...   
3     GP   F   15       U     GT3       T     4     2   health  services  ...   
4     GP   F   16       U     GT3       T     3     3    other     other  ...   

  goout Dalc  Walc  health  absences  G1  G2  G3   G3_label Predicted_G3_label  
0     4    1     1       3         4   0  11  11  below avg          above avg  
1     3    1     1       3         2   9  11  11  below avg          above avg  
2     2    2     3       3         6  12  13  12  above avg          below avg  
3     2    1     1       5         0  14  14  14  above avg          above avg  
4     2    1     2       5         0  11  13  13  above avg          above avg  

[5 rows x 35 columns]
