## Implementing Naive Bayes using sklearn library

#### Load necessary libraries

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### Load the dataset

In [5]:
data = pd.read_csv('music.csv')

data.head()

Unnamed: 0,Class,_RMSenergy_Mean,_Lowenergy_Mean,_Fluctuation_Mean,_Tempo_Mean,_MFCC_Mean_1,_MFCC_Mean_2,_MFCC_Mean_3,_MFCC_Mean_4,_MFCC_Mean_5,...,_Chromagram_Mean_9,_Chromagram_Mean_10,_Chromagram_Mean_11,_Chromagram_Mean_12,_HarmonicChangeDetectionFunction_Mean,_HarmonicChangeDetectionFunction_Std,_HarmonicChangeDetectionFunction_Slope,_HarmonicChangeDetectionFunction_PeriodFreq,_HarmonicChangeDetectionFunction_PeriodAmp,_HarmonicChangeDetectionFunction_PeriodEntropy
0,relax,0.052,0.591,9.136,130.043,3.997,0.363,0.887,0.078,0.221,...,0.426,1.0,0.008,0.101,0.316,0.261,0.018,1.035,0.593,0.97
1,relax,0.125,0.439,6.68,142.24,4.058,0.516,0.785,0.397,0.556,...,0.002,1.0,0.0,0.984,0.285,0.211,-0.082,3.364,0.702,0.967
2,relax,0.046,0.639,10.578,188.154,2.775,0.903,0.502,0.329,0.287,...,0.184,0.746,0.016,1.0,0.413,0.299,0.134,1.682,0.692,0.963
3,relax,0.135,0.603,10.442,65.991,2.841,1.552,0.612,0.351,0.011,...,0.038,1.0,0.161,0.757,0.422,0.265,0.042,0.354,0.743,0.968
4,relax,0.066,0.591,9.769,88.89,3.217,0.228,0.814,0.096,0.434,...,0.004,0.404,1.0,0.001,0.345,0.261,0.089,0.748,0.674,0.957


#### Check for null values in the dataset

In [6]:
data.isnull().sum()

Class                                             0
_RMSenergy_Mean                                   0
_Lowenergy_Mean                                   0
_Fluctuation_Mean                                 0
_Tempo_Mean                                       0
_MFCC_Mean_1                                      0
_MFCC_Mean_2                                      0
_MFCC_Mean_3                                      0
_MFCC_Mean_4                                      0
_MFCC_Mean_5                                      0
_MFCC_Mean_6                                      0
_MFCC_Mean_7                                      0
_MFCC_Mean_8                                      0
_MFCC_Mean_9                                      0
_MFCC_Mean_10                                     0
_MFCC_Mean_11                                     0
_MFCC_Mean_12                                     0
_MFCC_Mean_13                                     0
_Roughness_Mean                                   0
_Roughness_S

***It seems that there are no null values in the dataset***

#### Separate input features and output labels

In [7]:
x, y = data.drop('Class', axis = 1), data.Class

x

Unnamed: 0,_RMSenergy_Mean,_Lowenergy_Mean,_Fluctuation_Mean,_Tempo_Mean,_MFCC_Mean_1,_MFCC_Mean_2,_MFCC_Mean_3,_MFCC_Mean_4,_MFCC_Mean_5,_MFCC_Mean_6,...,_Chromagram_Mean_9,_Chromagram_Mean_10,_Chromagram_Mean_11,_Chromagram_Mean_12,_HarmonicChangeDetectionFunction_Mean,_HarmonicChangeDetectionFunction_Std,_HarmonicChangeDetectionFunction_Slope,_HarmonicChangeDetectionFunction_PeriodFreq,_HarmonicChangeDetectionFunction_PeriodAmp,_HarmonicChangeDetectionFunction_PeriodEntropy
0,0.052,0.591,9.136,130.043,3.997,0.363,0.887,0.078,0.221,0.118,...,0.426,1.000,0.008,0.101,0.316,0.261,0.018,1.035,0.593,0.970
1,0.125,0.439,6.680,142.240,4.058,0.516,0.785,0.397,0.556,0.799,...,0.002,1.000,0.000,0.984,0.285,0.211,-0.082,3.364,0.702,0.967
2,0.046,0.639,10.578,188.154,2.775,0.903,0.502,0.329,0.287,0.140,...,0.184,0.746,0.016,1.000,0.413,0.299,0.134,1.682,0.692,0.963
3,0.135,0.603,10.442,65.991,2.841,1.552,0.612,0.351,0.011,0.143,...,0.038,1.000,0.161,0.757,0.422,0.265,0.042,0.354,0.743,0.968
4,0.066,0.591,9.769,88.890,3.217,0.228,0.814,0.096,0.434,0.285,...,0.004,0.404,1.000,0.001,0.345,0.261,0.089,0.748,0.674,0.957
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,0.174,0.578,5.265,161.144,2.985,0.065,0.718,0.046,0.265,0.104,...,0.374,0.838,0.413,0.125,0.323,0.140,0.120,2.691,0.867,0.969
396,0.176,0.498,5.042,179.787,2.790,-0.148,0.342,-0.011,0.029,0.039,...,0.020,1.000,0.640,0.010,0.233,0.127,0.145,3.364,0.879,0.967
397,0.187,0.557,4.724,134.032,1.672,0.566,0.880,-0.113,0.244,0.214,...,0.052,0.184,0.287,0.101,0.269,0.157,0.111,3.364,0.819,0.962
398,0.140,0.573,4.470,113.600,1.997,-0.210,0.694,0.089,0.206,0.062,...,0.137,1.000,0.304,0.140,0.277,0.144,0.061,3.364,0.811,0.969


### Perform encoding of class labels

In [8]:
from sklearn.preprocessing import LabelEncoder

#### Initialize the encoder

In [9]:
encoder = LabelEncoder()

#### Transform the labels

In [10]:
y_transformed = encoder.fit_transform(y)

In [11]:
y_transformed

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0,

### Perform normalization of the dataset

In [14]:
from sklearn.preprocessing import MinMaxScaler

In [15]:
scaler = MinMaxScaler()

In [16]:
x_normalized = scaler.fit_transform(x)

In [17]:
x_normalized[:, 0]

array([0.09976247, 0.27315914, 0.08551069, 0.29691211, 0.13301663,
       0.30166271, 0.26840855, 0.22090261, 0.32066508, 0.28266033,
       0.31353919, 0.16627078, 0.28978622, 0.13776722, 0.0760095 ,
       0.39667458, 0.3087886 , 0.27790974, 0.152019  , 0.56057007,
       0.50356295, 0.14489311, 0.12589074, 0.27315914, 0.11876485,
       0.21852732, 0.44655582, 0.1543943 , 0.41330166, 0.20665083,
       0.13776722, 0.27790974, 0.29691211, 0.21377672, 0.26128266,
       0.34441805, 0.26840855, 0.3064133 , 0.20902613, 0.10688836,
       0.11876485, 0.41805226, 0.28266033, 0.21852732, 0.29453682,
       0.17339667, 0.39667458, 0.17814727, 0.27078385, 0.23515439,
       0.35154394, 0.49881235, 0.29928741, 0.29928741, 0.15914489,
       0.19714964, 0.23752969, 0.28028504, 0.13301663, 0.11638955,
       0.11638955, 0.37292162, 0.03800475, 0.37767221, 0.55581948,
       0.11876485, 0.304038  , 0.304038  , 0.24703088, 0.41805226,
       0.29216152, 0.4608076 , 0.11876485, 0.0736342 , 0.31353

### Separate data into training and testing

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x_normalized, y_transformed, test_size = 0.20, random_state = 100)

In [20]:
y_train

array([1, 2, 2, 0, 1, 1, 1, 1, 1, 2, 3, 0, 3, 0, 0, 0, 0, 1, 2, 2, 1, 3,
       3, 1, 0, 2, 2, 2, 3, 2, 0, 2, 1, 2, 0, 2, 1, 0, 2, 3, 3, 2, 1, 2,
       3, 1, 0, 1, 2, 2, 3, 3, 0, 1, 2, 1, 2, 0, 2, 3, 0, 2, 3, 1, 0, 0,
       2, 3, 0, 3, 1, 2, 0, 3, 1, 1, 0, 1, 1, 0, 2, 3, 2, 2, 0, 0, 1, 3,
       0, 2, 2, 0, 0, 1, 1, 3, 2, 1, 1, 2, 3, 1, 0, 0, 2, 0, 1, 0, 0, 1,
       1, 0, 1, 0, 0, 1, 0, 0, 3, 0, 2, 3, 2, 3, 1, 2, 1, 3, 0, 2, 0, 3,
       2, 3, 0, 0, 2, 0, 3, 3, 0, 1, 2, 0, 2, 2, 3, 0, 1, 2, 3, 1, 0, 1,
       2, 1, 2, 0, 0, 3, 3, 2, 0, 1, 3, 0, 0, 2, 3, 3, 3, 0, 1, 0, 1, 1,
       0, 0, 0, 3, 2, 1, 1, 0, 0, 2, 3, 2, 1, 1, 3, 2, 0, 2, 0, 2, 2, 1,
       3, 3, 1, 3, 3, 3, 2, 1, 2, 2, 2, 0, 3, 0, 3, 3, 1, 3, 1, 1, 1, 1,
       3, 3, 2, 3, 1, 0, 0, 3, 3, 1, 3, 2, 1, 3, 1, 2, 1, 0, 2, 3, 2, 3,
       2, 3, 1, 1, 0, 3, 3, 3, 0, 2, 1, 1, 0, 2, 0, 0, 2, 0, 2, 2, 1, 2,
       2, 3, 3, 1, 3, 1, 3, 1, 0, 3, 0, 3, 1, 2, 1, 2, 0, 3, 1, 3, 0, 1,
       2, 1, 0, 2, 2, 1, 3, 3, 1, 0, 1, 2, 2, 1, 2,

### Applying Gaussian Naive Bayes

In [80]:
from sklearn.naive_bayes import GaussianNB

#### Build the model

In [81]:
classifier = GaussianNB()

#### Train the model

In [82]:
classifier.fit(x_train, y_train)

In [83]:
x_test[0]

array([0.24703088, 0.53865337, 0.1464187 , 0.45899606, 0.53199365,
       0.53421878, 0.5176565 , 0.72013034, 0.55971595, 0.69562682,
       0.82415395, 0.58016304, 0.5137931 , 0.56166983, 0.43119266,
       0.53040103, 0.5700692 , 0.06163652, 0.58701533, 0.17050364,
       0.09032258, 0.43984962, 0.1455484 , 0.2986525 , 0.20946746,
       0.33625731, 0.1686448 , 0.29492825, 0.32712659, 0.08637749,
       0.06403941, 0.5       , 1.        , 0.512     , 0.528     ,
       0.762     , 0.859     , 0.094     , 0.485     , 0.516     ,
       0.027     , 0.16      , 0.01      , 0.729     , 0.49734043,
       0.48214286, 0.37276479, 0.47825076, 0.71164021, 0.68421053])

In [84]:
prediction = classifier.predict(x_test)

### Find accuracy

In [86]:
from sklearn.metrics import accuracy_score

In [87]:
accuracy = accuracy_score(y_test, prediction)

In [88]:
print("Accuracy =", accuracy * 100, "%")

Accuracy = 75.0 %
