In [None]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
# Create a synthetic dataset (no real data used)
X_data, y_data = make_classification(
    n_samples=1000,     # 1000 rows
    n_features=10,      # 10 features
    n_informative=5,    # 5 informative features
    n_redundant=2,      # 2 redundant features
    n_classes=2,
    random_state=42
)

# Put into DataFrame for clarity
X = pd.DataFrame(X_data, columns=[f"feature_{i}" for i in range(1, 11)])
y = pd.Series(y_data)

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [None]:
# Use a RandomForest model
model = RandomForestClassifier(random_state=42)
# Perform 5-fold cross-validation on training data
cv_scores = cross_val_score(model, X_train, y_train, cv=5)
print("Cross-validation scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())

Cross-validation scores: [0.9125  0.89375 0.94375 0.93125 0.95   ]
Mean CV Accuracy: 0.9262499999999999


In [None]:
# Define hyperparameter grid to test
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5]
}
grid_search = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid,
    cv=5,
    scoring='accuracy'
)
# Fit GridSearch on training data
grid_search.fit(X_train, y_train)
print("Best parameters found:", grid_search.best_params_)
best_model = grid_search.best_estimator_

Best parameters found: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}


In [None]:
# Predict on test data with best model
y_pred = best_model.predict(X_test)
# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Test Accuracy: 0.955
              precision    recall  f1-score   support

           0       0.96      0.96      0.96       112
           1       0.95      0.94      0.95        88

    accuracy                           0.95       200
   macro avg       0.95      0.95      0.95       200
weighted avg       0.95      0.95      0.95       200

