In [1]:
import numpy as np
import pandas as pd

# Problem Definition: Predicting Student Performance Based on Study Habits
# Dataset: Simulated data for student study hours, sleep hours, and exam scores
np.random.seed(42)
data_size = 500

data = pd.DataFrame({
    'study_hours': np.random.randint(1, 10, data_size),
    'sleep_hours': np.random.randint(4, 10, data_size),
    'past_scores': np.random.randint(50, 100, data_size),
    'pass_exam': np.random.choice([0, 1], size=data_size, p=[0.4, 0.6])  # 0: Fail, 1: Pass
})

# Splitting data into training and testing sets manually
def train_test_split_manual(X, y, test_size=0.2):
    split_idx = int(len(X) * (1 - test_size))
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    return X_train, X_test, y_train, y_test

X = data[['study_hours', 'sleep_hours', 'past_scores']].values
y = data['pass_exam'].values
X_train, X_test, y_train, y_test = train_test_split_manual(X, y)

# Implementing a Simple Decision Tree Classifier manually
class SimpleDecisionTree:
    def __init__(self):
        self.thresholds = None

    def fit(self, X, y):
        self.thresholds = np.mean(X, axis=0)

    def predict(self, X):
        predictions = []
        for sample in X:
            if sample[0] > self.thresholds[0] and sample[2] > self.thresholds[2]:
                predictions.append(1)  # Pass
            else:
                predictions.append(0)  # Fail
        return np.array(predictions)

# Training the model
model = SimpleDecisionTree()
model.fit(X_train, y_train)

# Making Predictions
y_pred = model.predict(X_test)

# Evaluating the Model
accuracy = np.mean(y_pred == y_test)
print(f"Model Accuracy: {accuracy:.2f}")

# Example Prediction
sample_student = np.array([[7, 6, 85]])  # 7 study hours, 6 sleep hours, 85 past score
prediction = model.predict(sample_student)
print("Predicted Outcome (1=Pass, 0=Fail):", prediction[0])


Model Accuracy: 0.51
Predicted Outcome (1=Pass, 0=Fail): 1
