In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier
# Step 1: Generate synthetic dataset
np.random.seed(42)  # For reproducibility


# Features
study_hours = np.random.randint(5, 20, 100)  # Numerical: Study Hours (5 to 20)
sleep_hours = np.random.randint(6, 10, 100)  # Numerical: Sleep Hours (6 to 9)
participation = np.random.choice(['Low', 'Medium', 'High'], 100)  # Categorical: Class Participation

# Outcome: Pass/Fail
# Rule: Pass if study_hours * 0.5 + sleep_hours * 0.3 + participation score >= 10
participation_scores = {'Low': 1, 'Medium': 2, 'High': 3}
pass_status = (study_hours * 0.5 + sleep_hours * 0.3 + np.vectorize(participation_scores.get)(participation) >= 10).astype(int)

# Create a DataFrame
data = pd.DataFrame({
    'Study Hours': study_hours,
    'Sleep Hours': sleep_hours,
    'Participation': participation,
    'Pass Status': pass_status
})



X = data[['Study Hours', 'Sleep Hours', 'Participation']]
y = data['Pass Status']

X = pd.get_dummies(X, columns=['Participation'], drop_first=True)  # One-hot encoding
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)


In [None]:
X = data[['Study Hours', 'Sleep Hours', 'Participation']]
y = data['Pass Status']


In [None]:
X = pd.get_dummies(X, columns=['Participation'], drop_first=True)  # One-hot encoding


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

In [None]:
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)
y_pred

In [None]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
new_data = pd.DataFrame({
    'Study Hours': [8, 15],
    'Sleep Hours': [7, 9],
    'Participation': ['Low', 'High']
})

In [None]:
predictions = model.predict(new_data)
predicted_probabilities = model.predict_proba(new_data)
print("New Data Predictions (Pass=1/Fail=0):", predictions)
print("New Data Prediction Probabilities:", predicted_probabilities)