**Section 1: Feature Engineering & Model Tuning**

In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,classification_report


In [2]:
data = {
    'math_score': [78, 85, 90, 88, 76, 95, 89, 92],
    'science_score': [80, 90, 88, 86, 78, 96, 91, 93],
    'english_score': [75, 80, 85, 82, 79, 94, 88, 90],
    'passed': [1, 1, 1, 1, 0, 1, 1, 1]  }

In [3]:
df = pd.DataFrame(data)
df['total_score'] = df['math_score'] + df['science_score'] + df['english_score']
X = df.drop(columns=['passed'])
y = df['passed']


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
param_grid = {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

In [5]:
rf_model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best Model
best_model = grid_search.best_estimator_

# Evaluate Model Performance
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)



In [6]:
# Results
print("Best Parameters:", grid_search.best_params_)
print("Model Accuracy:", accuracy)


Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 10}
Model Accuracy: 1.0


**Section 2: Fraud Detection with Decision Trees**

In [7]:
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier

In [12]:
df = pd.read_csv('fraud_dataset.csv')
print("Missing values:\n", df.isnull().sum())

Missing values:
 transaction_id        0
transaction_amount    0
location              0
merchant              0
age                   0
gender                0
fraud_label           0
dtype: int64


In [13]:
le = LabelEncoder()
df['Merchant'] = le.fit_transform(df['merchant'])
X = df[['transaction_amount','age' ,'Merchant']]
y = df['fraud_label']

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)


In [17]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00         1

    accuracy                           1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18

