In [8]:
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import ensemble
from sklearn.ensemble import RandomForestClassifier

In [9]:
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score

In [10]:
df_slider_features = pd.read_csv("Features/df_slider_feature.csv")
df_slider_target = pd.read_csv("Features/df_slider_target.csv")

In [11]:
X = df_slider_features
y = df_slider_target.values.ravel()

In [12]:
# Lets split the data into 5 folds. 
# We will use this 'kf'(StratiFiedKFold splitting stratergy) object as input to cross_val_score() method
# The folds are made by preserving the percentage of samples for each class.
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=69)

cnt = 1
# split()  method generate indices to split data into training and test set.
for train_index, test_index in kf.split(X, y):
    print(f'Fold:{cnt}, Train set: {len(train_index)}, Test set:{len(test_index)}')
    cnt+=1
    
# Note that: 
# cross_val_score() parameter 'cv' will by default use StratifiedKFold spliting startergy if we just specify value of number of folds. 
# So you can bypass above step and just specify cv= 5 in cross_val_score() function

Fold:1, Train set: 9825, Test set:2457
Fold:2, Train set: 9825, Test set:2457
Fold:3, Train set: 9826, Test set:2456
Fold:4, Train set: 9826, Test set:2456
Fold:5, Train set: 9826, Test set:2456


In [13]:
score = cross_val_score(ensemble.RandomForestClassifier(random_state= 69), X, y, cv= kf, scoring="accuracy")
print(f'Scores for each fold are: {score}')
print(f'Average score: {"{:.2f}".format(score.mean())}')

Scores for each fold are: [0.95889296 0.95278795 0.95480456 0.94991857 0.95724756]
Average score: 0.95


In [14]:
n_estimators = [50, 100, 150, 200, 250, 300, 350]

for val in n_estimators:
    score = cross_val_score(ensemble.RandomForestClassifier(n_estimators= val, random_state= 69), X, y, cv= kf, scoring="accuracy")
    print(f'Average score({val}): {"{:.3f}".format(score.mean())}')

Average score(50): 0.954
Average score(100): 0.955
Average score(150): 0.955
Average score(200): 0.955
Average score(250): 0.956
Average score(300): 0.955
Average score(350): 0.955
