# RTF Model for predicting racket type using P1, P2 and P3, based on envelope - Sound

### Model Description

This notebook implements a Random Tree Forest (RTF) model to predict the type of a racket (RB, RO, RR, RV) based on sound features extracted from audio files. The workflow involves reading `.wav` files, extracting the envelope of the frequency spectrum using the Hilbert transform, and training the model using these features. The model's performance is evaluated using accuracy metrics and visualized through scatter plots and confusion matrices.

### Import libraries

In [None]:
import os
import sys
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

### Import Tools Functions

In [None]:
# Reach the project root
notebook_path = os.path.abspath('')
project_root = os.path.abspath(os.path.join(notebook_path, '../../../'))
functions_path = os.path.join(project_root, 'Functions')

# Add Functions folder
if functions_path not in sys.path:
    sys.path.append(functions_path)

In [12]:
# Import functions

from readWavFolder import readWavFolder
from spectrumFromSignal import spectrumFromSignal
from envelope_from_signal import envelope_from_signal

## Main

In [None]:
# List of racket types
raquetteTypeList = {"RB": 0, "RO": 1, "RR": 2, "RV": 3}
bd = 15

# List for storing results
results = []

X = []
Y_Label = []

# Create a DataFrame to store details of each wav file
wav_files_data = []

# Process each folder (P1, P2, P3)
for folder, folder_path in [("P1", "../../../Data/Sound/P1"), 
                            ("P2", "../../../Data/Sound/P2"), 
                            ("P3", "../../../Data/Sound/P3")]:
    sample_rates, wav_files, file_names = readWavFolder(folder_path)
    
    for sample_rate, wav_file, file_name in zip(sample_rates, wav_files, file_names):
        wav_files_data.append({
            "Folder": folder,
            "File_Name": file_name,
            "Sample_Rate": sample_rate,
            "Signal": wav_file
        })

# Convert the list of dictionaries into a DataFrame
wav_files_df = pd.DataFrame(wav_files_data)

# Display the DataFrame
#print(wav_files_df)

# Pour chaque fichier wav on extrait son spectre et on le filtre entre 150 et 1000hz et on prend les n meilleurs peaks
for i in range(len(wav_files_df["Signal"])):
    if "C" in wav_files_df["File_Name"][i]:
        if 'RB' in wav_files_df["File_Name"][i]:
            raquetteType = 'RB'
        elif 'RR' in wav_files_df["File_Name"][i]:
            raquetteType = 'RR'
        elif 'RO' in wav_files_df["File_Name"][i]:
            raquetteType = 'RO'
        elif 'RV' in wav_files_df["File_Name"][i]:
            raquetteType = 'RV'
            
        spectrum,freqs = spectrumFromSignal(wav_files_df["Signal"][i], wav_files_df["Sample_Rate"][i])
        # spectrumVect.append(spectrum) 

        envelope = envelope_from_signal(wav_files_df["Signal"][i][:, 0])
        # print(envelope)

        X.append(envelope)

        Y_Label.append(raquetteType)

# Ensure each element of X has the same length by padding or truncating
max_length = max(len(x) for x in X)
X = [x[:max_length] if len(x) > max_length else np.pad(x, (0, max_length - len(x)), mode='constant') for x in X]

In [14]:
# Encode string labels into integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(Y_Label)

# Divide data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None, shuffle=True)

In [15]:
# Random Forest algorithm parameters
n_estimators_range = range(10, 101, 10)  # Number of trees between 10 and 100
max_depth_range = [None, 10, 20, 30, 40]  # Different depths
min_samples_split_range = [2, 5, 10]  # Minimum number to divide a node
min_samples_leaf_range = [1, 2, 4]  # Minimum number of samples in a sheet
max_features_range = ['sqrt', 'log2', None]  # Number of features per tree

# Test all the combinations of parameters
for n_estimators in n_estimators_range:
    for max_depth in max_depth_range:
        for min_samples_split in min_samples_split_range:
            # Create and train the Random Forest model
            rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
                                        min_samples_split=min_samples_split, random_state=42)
            rf.fit(X_train, y_train)

            # Evaluate on the test set
            y_pred = rf.predict(X_test)
            accuracy_test = accuracy_score(y_test, y_pred)

            # Evaluate on the training set
            y_train_pred = rf.predict(X_train)
            accuracy_train = accuracy_score(y_train, y_train_pred)

            # Add the results to the list
            results.append({
                'band_width': bd,
                'n_estimators': n_estimators,
                'max_depth': max_depth,
                'min_samples_split': min_samples_split,
                'accuracy_train': accuracy_train,
                'accuracy_test': accuracy_test
            })

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results)

# Sort the results by accuracy_test in descending order
sorted_results_df = results_df.sort_values(by='accuracy_test', ascending=False)

print(sorted_results_df)

# Register the best parameters in a CSV file
sorted_results_df.to_csv("S_RTF_Racket_P1.P2.P3_Envelope.csv", index=False)

print("Results have been saved to 'S_RTF_Racket_P1.P2.P3_Envelope.csv'.")

     band_width  n_estimators  max_depth  min_samples_split  accuracy_train  \
48           15            40       10.0                  2        1.000000   
138          15           100       10.0                  2        1.000000   
50           15            40       10.0                 10        0.991632   
42           15            30       40.0                  2        1.000000   
30           15            30        NaN                  2        1.000000   
..          ...           ...        ...                ...             ...   
77           15            60        NaN                 10        0.997908   
83           15            60       20.0                 10        0.997908   
86           15            60       30.0                 10        0.997908   
89           15            60       40.0                 10        0.997908   
4            15            10       10.0                  5        0.974895   

     accuracy_test  
48        0.525000  
138      