# Tree-based model: Random Forests

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

import pandas as pd
import numpy as np
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
from scipy import stats

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)


Training the model with the test data and evaluating the model with validation data

In [5]:
#Import data

data = pd.read_csv("siren_data_train_no_outliers.csv")

#Define input and output
X = data.drop(["heard"], axis=1)
y = data["heard"]


#Create a RandomForestClassifier
model = RandomForestClassifier()

#Randomly splitting the dataset into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the classifier on the training data
model.fit(X_train, y_train)

#Make predictions on the test data
y_pred = model.predict(X_val)

#Hyperparameter tuning. Inspiration from https://www.geeksforgeeks.org/random-forest-hyperparameter-tuning-in-python/
param_grid = {
    'n_estimators': [25, 50, 100, 150],
    'max_features': ['sqrt', 'log2', None],
    'max_depth': [3,6,9],
    'max_leaf_nodes':[3,6,9]
}

#Using Grid Search for hyperparameter tuning
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1)

#Fit the gridsearch to the data
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print(f"Best parameters: {best_params}")


#Print the mean accuracy
print(f"Mean accuracy: {grid_search.best_score_}")

#Print the confusion matrix
pd.crosstab(y_val, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)

Best parameters: {'max_depth': 6, 'max_features': 'sqrt', 'max_leaf_nodes': 9, 'n_estimators': 50}
Mean accuracy: 0.9290450090563327


Predicted,0,1,All
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,181,55,236
1,34,823,857
All,215,878,1093
