In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('combined_metrics.csv') 
# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[['Mean_X', 'Std Dev_X', 'Energy_X', 'Entropy_X', 'Peaks_X']]
y = data['category']


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Train the classifier
nb_classifier.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = nb_classifier.predict(X_test)

# Evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Generate a classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.6442417331812998
Classification Report:
              precision    recall  f1-score   support

           1       0.87      0.44      0.58       197
           2       0.52      0.90      0.66       181
           3       0.46      0.10      0.16       164
           4       0.80      0.94      0.86       177
           5       0.60      0.85      0.70       158

    accuracy                           0.64       877
   macro avg       0.65      0.64      0.59       877
weighted avg       0.66      0.64      0.60       877



In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[['Mean_X', 'Std Dev_X', 'Energy_X', 'Entropy_X', 'Peaks_X']]
y = data['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Define the hyperparameters grid to search
param_grid = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=nb_classifier, param_grid=param_grid, cv=5, scoring='accuracy')

# Perform GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Make predictions on the testing set using the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the performance of the best model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Generate a classification report for the best model
print("Classification Report:")
print(classification_report(y_test, y_pred))


Best Hyperparameters: {'var_smoothing': 1e-09}
Accuracy: 0.5883694412770809
Classification Report:
              precision    recall  f1-score   support

           1       0.57      0.71      0.64       192
           2       0.47      0.95      0.63       155
           3       0.50      0.05      0.08       175
           4       0.83      0.86      0.84       181
           5       0.57      0.39      0.46       174

    accuracy                           0.59       877
   macro avg       0.59      0.59      0.53       877
weighted avg       0.59      0.59      0.53       877



In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[::-1]
y = data['category']

# Define the range of test sizes and random states to iterate over
test_sizes = [0.1, 0.15, 0.2, 0.25, 0.3]
random_states = list(range(42, 201))

best_accuracy = 0
best_test_size = 0
best_random_state = 0

for test_size in test_sizes:
    for random_state in random_states:
        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

        # Initialize the Naive Bayes classifier
        nb_classifier = GaussianNB()

        # Fit the classifier to the training data
        nb_classifier.fit(X_train, y_train)

        # Make predictions on the testing set
        y_pred = nb_classifier.predict(X_test)

        # Evaluate the performance of the classifier
        accuracy = accuracy_score(y_test, y_pred)
        
        # Check if the current accuracy is better than the best accuracy so far
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_test_size = test_size
            best_random_state = random_state

print("Best Accuracy:", best_accuracy)
print("Best Test Size:", best_test_size)
print("Best Random State:", best_random_state)


Best Accuracy: 0.9840546697038725
Best Test Size: 0.1
Best Random State: 121


In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[::-1]
y = data['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=176)

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Define the hyperparameters grid to search
param_grid = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=nb_classifier, param_grid=param_grid, cv=10, scoring='accuracy')

# Perform GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Make predictions on the testing set using the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the performance of the best model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Generate a classification report for the best model
print("Classification Report:")
print(classification_report(y_test, y_pred))


Best Hyperparameters: {'var_smoothing': 1e-09}
Accuracy: 0.7947548460661346
Classification Report:
              precision    recall  f1-score   support

           1       0.99      0.99      0.99       165
           2       0.97      0.99      0.98       189
           3       0.47      0.95      0.62       152
           4       0.90      0.10      0.18       188
           5       0.97      1.00      0.98       183

    accuracy                           0.79       877
   macro avg       0.86      0.81      0.75       877
weighted avg       0.87      0.79      0.75       877



In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[::-1]
y = data['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=176)

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Define the hyperparameters grid to search
param_grid = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=nb_classifier, param_grid=param_grid, cv=10, scoring='accuracy')

# Perform GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Make predictions on the testing set using the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the performance of the best model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Generate a classification report for the best model
print("Classification Report:")
print(classification_report(y_test, y_pred))


Best Hyperparameters: {'var_smoothing': 1e-09}
Accuracy: 0.7947548460661346
Classification Report:
              precision    recall  f1-score   support

           1       0.99      0.99      0.99       165
           2       0.97      0.99      0.98       189
           3       0.47      0.95      0.62       152
           4       0.90      0.10      0.18       188
           5       0.97      1.00      0.98       183

    accuracy                           0.79       877
   macro avg       0.86      0.81      0.75       877
weighted avg       0.87      0.79      0.75       877



In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[::-1]
y = data['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=121)

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Define the hyperparameters grid to search
param_grid = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=nb_classifier, param_grid=param_grid, cv=10, scoring='accuracy')

# Perform GridSearchCV
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Make predictions on the testing set using the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the performance of the best model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Generate a classification report for the best model
print("Classification Report:")
print(classification_report(y_test, y_pred))



Best Hyperparameters: {'var_smoothing': 1e-09}
Accuracy: 0.9840546697038725
Classification Report:
              precision    recall  f1-score   support

           1       0.99      0.99      0.99        85
           2       1.00      0.99      0.99        94
           3       1.00      0.95      0.97        81
           4       0.95      0.99      0.97        94
           5       0.99      1.00      0.99        85

    accuracy                           0.98       439
   macro avg       0.99      0.98      0.98       439
weighted avg       0.98      0.98      0.98       439



In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[::-1]
y = data['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=121)

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Train the classifier on the training data
nb_classifier.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = nb_classifier.predict(X_test)

# Evaluate the performance of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Generate a classification report for the classifier
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.9840546697038725
Classification Report:
              precision    recall  f1-score   support

           1       0.99      0.99      0.99        85
           2       1.00      0.99      0.99        94
           3       1.00      0.95      0.97        81
           4       0.95      0.99      0.97        94
           5       0.99      1.00      0.99        85

    accuracy                           0.98       439
   macro avg       0.99      0.98      0.98       439
weighted avg       0.98      0.98      0.98       439



In [45]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[::-1]
y = data['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=121)

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Train the classifier on the training data
nb_classifier.fit(X_train, y_train)

# Compute training accuracy
y_train_pred = nb_classifier.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print("Training Accuracy:", train_accuracy)

# Make predictions on the testing set
y_pred = nb_classifier.predict(X_test)

# Evaluate the performance of the classifier
test_accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", test_accuracy)

# Generate a classification report for the classifier
print("Classification Report:")
print(classification_report(y_test, y_pred))




Training Accuracy: 0.9591784989858012
Test Accuracy: 0.9840546697038725
Classification Report:
              precision    recall  f1-score   support

           1       0.99      0.99      0.99        85
           2       1.00      0.99      0.99        94
           3       1.00      0.95      0.97        81
           4       0.95      0.99      0.97        94
           5       0.99      1.00      0.99        85

    accuracy                           0.98       439
   macro avg       0.99      0.98      0.98       439
weighted avg       0.98      0.98      0.98       439



In [51]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[::-1]
y = data['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=121)

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Perform k-fold cross-validation
k = 10  # number of folds
cv_scores = cross_val_score(nb_classifier, X, y, cv=k)

# Print the cross-validation sc
print("Cross-Validation Scores:", cv_scores)

# Compute the average cross-validation score
avg_cv_score = cv_scores.mean()
print("Average Cross-Validation Score:", avg_cv_score)

# Train the classifier on the entire training data
nb_classifier.fit(X_train, y_train)

# Compute training accuracy
train_accuracy = accuracy_score(y_train, nb_classifier.predict(X_train))
print("Training Accuracy:", train_accuracy)

# Make predictions on the testing set
y_pred = nb_classifier.predict(X_test)

# Compute testing accuracy
test_accuracy = accuracy_score(y_test, y_pred)
print("Testing Accuracy:", test_accuracy)

# Generate a classification report for the classifier
print("Classification Report:")
print(classification_report(y_test, y_pred))


Cross-Validation Scores: [0.6856492  0.98861048 1.         1.         1.         1.
 1.         1.         1.         0.84931507]
Average Cross-Validation Score: 0.9523574749586545
Training Accuracy: 0.9591784989858012
Testing Accuracy: 0.9840546697038725
Classification Report:
              precision    recall  f1-score   support

           1       0.99      0.99      0.99        85
           2       1.00      0.99      0.99        94
           3       1.00      0.95      0.97        81
           4       0.95      0.99      0.97        94
           5       0.99      1.00      0.99        85

    accuracy                           0.98       439
   macro avg       0.99      0.98      0.98       439
weighted avg       0.98      0.98      0.98       439



In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('Dataset/fft_combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[::-1]
y = data['category']

# Define the range of test sizes and random states to iterate over
test_sizes = [0.1, 0.15, 0.2, 0.25, 0.3]
random_states = list(range(42, 201))

best_accuracy = 0
best_test_size = 0
best_random_state = 0

for test_size in test_sizes:
    for random_state in random_states:
        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

        # Initialize the Naive Bayes classifier
        nb_classifier = GaussianNB()

        # Fit the classifier to the training data
        nb_classifier.fit(X_train, y_train)

        # Make predictions on the testing set
        y_pred = nb_classifier.predict(X_test)

        # Evaluate the performance of the classifier
        accuracy = accuracy_score(y_test, y_pred)
        
        # Check if the current accuracy is better than the best accuracy so far
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_test_size = test_size
            best_random_state = random_state

print("Best Accuracy:", best_accuracy)
print("Best Test Size:", best_test_size)
print("Best Random State:", best_random_state)

Best Accuracy: 1.0
Best Test Size: 0.1
Best Random State: 43


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('Dataset/fft_combined_metrics.csv')

# Drop rows with missing values
data.dropna(inplace=True)

# Split the data into features (X) and target variable (y)
X = data[::-1]
y = data['category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=43)

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Perform k-fold cross-validation
k = 5 # number of folds
cv_scores = cross_val_score(nb_classifier, X, y, cv=k)

# Print the cross-validation sc
print("Cross-Validation Scores:", cv_scores)

# Compute the average cross-validation score
avg_cv_score = cv_scores.mean()
print("Average Cross-Validation Score:", avg_cv_score)

# Train the classifier on the entire training data
nb_classifier.fit(X_train, y_train)

# Compute training accuracy
train_accuracy = accuracy_score(y_train, nb_classifier.predict(X_train))
print("Training Accuracy:", train_accuracy)

# Make predictions on the testing set
y_pred = nb_classifier.predict(X_test)

# Compute testing accuracy
test_accuracy = accuracy_score(y_test, y_pred)
print("Testing Accuracy:", test_accuracy)

# Generate a classification report for the classifier
print("Classification Report:")
print(classification_report(y_test, y_pred))

Cross-Validation Scores: [0.77777778 1.         1.         1.         1.        ]
Average Cross-Validation Score: 0.9555555555555555
Training Accuracy: 0.9459459459459459
Testing Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         1

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5

