In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split


In [2]:
data = pd.read_excel('output.xlsx')

In [3]:
data.shape

(9990, 80)

In [4]:
data.drop(['filename', 'length'], axis=1, inplace=True)

In [5]:
data.shape

(9990, 78)

KNN Classifier

In [8]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

In [9]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
from sklearn.model_selection import cross_val_score
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

best_k = None
best_accuracy = 0
best_model = None

for k_value in range(1, 11):
    knn = KNeighborsClassifier(n_neighbors=k_value, n_jobs=-1, p=2, metric='minkowski', algorithm='brute')
    
    # Cross-validation ile modelin performansını değerlendir
    scores = cross_val_score(knn, X_train, y_train, cv=10)  # 10-fold cross-validation
    mean_accuracy = np.mean(scores)
    
    print(f"K={k_value}, Cross-Validation Accuracy: {mean_accuracy}")
    
    if mean_accuracy > best_accuracy:
        best_accuracy = mean_accuracy
        best_k = k_value
        best_model = knn  # En iyi modeli güncelle

print(f"En iyi k değeri: {best_k} with Cross-Validation Accuracy: {best_accuracy}")

# En iyi k değeri ile modeli eğitildi, ama modeli tutmak için best_model kullanılabilir
best_model.fit(X_train, y_train)

# Test verileri üzerinde performansı değerlendirin
test_accuracy = best_model.score(X_test, y_test)
print(f"Test Accuracy for the best k value: {test_accuracy}")





K=1, Cross-Validation Accuracy: 0.8963962765957447
K=2, Cross-Validation Accuracy: 0.8644906132665833
K=3, Cross-Validation Accuracy: 0.8768771902377972
K=4, Cross-Validation Accuracy: 0.8732489048811013
K=5, Cross-Validation Accuracy: 0.8727485919899876
K=6, Cross-Validation Accuracy: 0.8658671777221528
K=7, Cross-Validation Accuracy: 0.8663685857321652
K=8, Cross-Validation Accuracy: 0.8589845118898622
K=9, Cross-Validation Accuracy: 0.8552320087609511
K=10, Cross-Validation Accuracy: 0.8491013767209011
En iyi k değeri: 1 with Cross-Validation Accuracy: 0.8963962765957447
Test Accuracy for the best k value: 0.9024024024024024


In [12]:
y_pred = best_model.predict(X_test)

In [13]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

target_names = ['0blues', '1classical', '2country', '3disco', '4hiphop', '5jazz', '6metal', '7pop', '8reggae', '9rock']
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=target_names))
print(confusion_matrix(y_test, y_pred))

0.9024024024024024
              precision    recall  f1-score   support

      0blues       0.90      0.92      0.91       208
  1classical       0.96      0.94      0.95       203
    2country       0.86      0.87      0.87       188
      3disco       0.86      0.88      0.87       200
     4hiphop       0.93      0.90      0.92       215
       5jazz       0.90      0.94      0.92       191
      6metal       0.97      0.95      0.96       204
        7pop       0.92      0.87      0.90       181
     8reggae       0.90      0.90      0.90       210
       9rock       0.84      0.85      0.85       198

    accuracy                           0.90      1998
   macro avg       0.90      0.90      0.90      1998
weighted avg       0.90      0.90      0.90      1998

[[191   0   4   1   1   5   0   0   1   5]
 [  1 190   2   2   0   8   0   0   0   0]
 [  4   1 164   2   0   4   0   1   4   8]
 [  2   2   5 175   5   0   2   0   3   6]
 [  2   0   1   2 194   0   0  10   5   1]
 [  2  

Random Forest

In [14]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
import numpy as np

# Hiperparametre kombinasyonları
n_estimators_values = [500]
max_depth_values = [None, 5, 10]
min_samples_split_values = [2, 5, 10]

best_model = None
best_mean_accuracy = 0

for n_estimators_value in n_estimators_values:
    for max_depth_value in max_depth_values:
        for min_samples_split_value in min_samples_split_values:
            # Random Forest modelini oluşturun
            model = RandomForestClassifier(
                n_estimators=n_estimators_value,
                max_depth=max_depth_value,
                min_samples_split=min_samples_split_value,
                verbose=1,
                n_jobs=-1,
                random_state=42,
                criterion='gini'
            )

            # Cross-validation ile modelin performansını değerlendirin
            scores = cross_val_score(model, X_train, y_train, cv=5, n_jobs=-1)  # 5-fold cross-validation
            mean_accuracy = np.mean(scores)

            print(f"Estimators: {n_estimators_value}, Depth: {max_depth_value}, Min Samples Split: {min_samples_split_value}, Cross-Validation Accuracy: {mean_accuracy}")

            # En iyi modeli güncelle
            if mean_accuracy > best_mean_accuracy:
                best_mean_accuracy = mean_accuracy
                best_model = model

# En iyi modeli kullanarak eğitim verileri üzerinde modeli eğitin
best_model.fit(X_train, y_train)

# Test verileri üzerinde performansı değerlendirin
y_pred_best = best_model.predict(X_test)

# Test Accuracy Score'u yazdırın
print("\nBest Model Test Accuracy Score:")
print(accuracy_score(y_test, y_pred_best))


Estimators: 500, Depth: None, Min Samples Split: 2, Cross-Validation Accuracy: 0.8736236900252896
Estimators: 500, Depth: None, Min Samples Split: 5, Cross-Validation Accuracy: 0.8713708740052646
Estimators: 500, Depth: None, Min Samples Split: 10, Cross-Validation Accuracy: 0.8547290586028031
Estimators: 500, Depth: 5, Min Samples Split: 2, Cross-Validation Accuracy: 0.635384130100086
Estimators: 500, Depth: 5, Min Samples Split: 5, Cross-Validation Accuracy: 0.6347590523175859
Estimators: 500, Depth: 5, Min Samples Split: 10, Cross-Validation Accuracy: 0.63500889557851
Estimators: 500, Depth: 10, Min Samples Split: 2, Cross-Validation Accuracy: 0.8191938641250281
Estimators: 500, Depth: 10, Min Samples Split: 5, Cross-Validation Accuracy: 0.8169411263767014
Estimators: 500, Depth: 10, Min Samples Split: 10, Cross-Validation Accuracy: 0.8103094001961489


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 426 tasks      | elapsed:    9.4s



Best Model Test Accuracy Score:
0.8918918918918919


[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:   10.9s finished
[Parallel(n_jobs=12)]: Using backend ThreadingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done  26 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 176 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 426 tasks      | elapsed:    0.0s
[Parallel(n_jobs=12)]: Done 500 out of 500 | elapsed:    0.0s finished


In [20]:
print(accuracy_score(y_test, y_pred_best))
print(classification_report(y_test, y_pred_best, target_names=target_names))
print(confusion_matrix(y_test, y_pred_best))

0.8918918918918919
              precision    recall  f1-score   support

      0blues       0.86      0.92      0.89       208
  1classical       0.93      0.98      0.96       203
    2country       0.80      0.90      0.85       188
      3disco       0.87      0.85      0.86       200
     4hiphop       0.94      0.89      0.92       215
       5jazz       0.93      0.92      0.92       191
      6metal       0.91      0.94      0.92       204
        7pop       0.90      0.90      0.90       181
     8reggae       0.87      0.89      0.88       210
       9rock       0.91      0.73      0.81       198

    accuracy                           0.89      1998
   macro avg       0.89      0.89      0.89      1998
weighted avg       0.89      0.89      0.89      1998

[[191   1   6   1   0   4   3   0   2   0]
 [  0 199   1   0   0   2   0   0   0   1]
 [  5   2 170   4   0   2   0   1   3   1]
 [  5   2   3 171   6   0   4   2   4   3]
 [  1   0   2   2 192   0   5   8   5   0]
 [  5  

Gradient Boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import numpy as np

# Hiperparametre kombinasyonları
learning_rates = [0.1]
n_estimators_values = [500]
max_depth_values = [3, 5, 7]

best_model = None
best_mean_accuracy = 0

for learning_rate in learning_rates:
    for n_estimators_value in n_estimators_values:
        for max_depth_value in max_depth_values:
            # Gradient Boosting modelini oluşturun
            model = GradientBoostingClassifier(
                n_estimators=n_estimators_value,
                learning_rate=learning_rate,
                max_depth=max_depth_value,
                max_features='log2',
                random_state=42
            )

            # Cross-validation ile modelin performansını değerlendirin
            scores = cross_val_score(model, X_train, y_train, cv=10, n_jobs=-1)  # 5-fold cross-validation
            mean_accuracy = np.mean(scores)

            print(f"Learning Rate: {learning_rate}, Estimators: {n_estimators_value}, Depth: {max_depth_value}, Cross-Validation Accuracy: {mean_accuracy}")

            # En iyi modeli güncelle
            if mean_accuracy > best_mean_accuracy:
                best_mean_accuracy = mean_accuracy
                best_model = model

# En iyi modeli kullanarak eğitim verileri üzerinde modeli eğitin
best_model.fit(X_train, y_train)

# Test verileri üzerinde performansı değerlendirin
y_pred_best = best_model.predict(X_test)

# Test Accuracy Score'u yazdırın
print("\nBest Model Test Accuracy Score:")
print(accuracy_score(y_test, y_pred_best))

# Test verileri üzerindeki Confusion Matrix'i oluşturun ve yazdırın
print("\nConfusion Matrix (test):")
cm_test_best = confusion_matrix(y_test, y_pred_best)
print(cm_test_best)

# Classification Report'u yazdırın
print("\nClassification Report:")
print(classification_report(y_test, y_pred_best, target_names=target_names))


XGBoost

In [529]:
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import numpy as np

# Hiperparametre kombinasyonları
learning_rates = [0.01, 0.1, 0.5]
n_estimators_values = [100, 500, 1000]
max_depth_values = [3, 5, 7]

best_model = None
best_mean_accuracy = 0

for learning_rate in learning_rates:
    for n_estimators_value in n_estimators_values:
        for max_depth_value in max_depth_values:
            # XGBoost modelini oluşturun
            model = XGBClassifier(
                n_estimators=n_estimators_value,
                learning_rate=learning_rate,
                max_depth=max_depth_value,
                random_state=42,
                n_jobs=-1
            )

            # Cross-validation ile modelin performansını değerlendirin
            scores = cross_val_score(model, X_train, y_train, cv=5, n_jobs=-1)  # 5-fold cross-validation
            mean_accuracy = np.mean(scores)

            print(f"Learning Rate: {learning_rate}, Estimators: {n_estimators_value}, Depth: {max_depth_value}, Cross-Validation Accuracy: {mean_accuracy}")

            # En iyi modeli güncelle
            if mean_accuracy > best_mean_accuracy:
                best_mean_accuracy = mean_accuracy
                best_model = model

# En iyi modeli kullanarak eğitim verileri üzerinde modeli eğitin
best_model.fit(X_train, y_train)

# Test verileri üzerinde performansı değerlendirin
y_pred_best = best_model.predict(X_test)

# Test Accuracy Score'u yazdırın
print("\nBest Model Test Accuracy Score:")
print(accuracy_score(y_test, y_pred_best))

# Test verileri üzerindeki Confusion Matrix'i oluşturun ve yazdırın
print("\nConfusion Matrix (test):")
cm_test_best = confusion_matrix(y_test, y_pred_best)
print(cm_test_best)

# Classification Report'u yazdırın
print("\nClassification Report:")
print(classification_report(y_test, y_pred_best, target_names=target_names))


In [530]:
#without cross validation
# print(accuracy_score(y_test, y_pred_xgb))
# print(classification_report(y_test, y_pred_xgb, target_names=target_names))
# print(confusion_matrix(y_test, y_pred_xgb))

0.9274274274274275
              precision    recall  f1-score   support

      0blues       0.92      0.92      0.92       208
  1classical       0.96      0.99      0.98       203
    2country       0.91      0.95      0.93       188
      3disco       0.92      0.89      0.91       200
     4hiphop       0.94      0.93      0.93       215
       5jazz       0.92      0.92      0.92       191
      6metal       0.96      0.96      0.96       204
        7pop       0.88      0.93      0.91       181
     8reggae       0.95      0.91      0.93       210
       9rock       0.91      0.87      0.89       198

    accuracy                           0.93      1998
   macro avg       0.93      0.93      0.93      1998
weighted avg       0.93      0.93      0.93      1998

[[191   1   3   3   1   5   2   0   1   1]
 [  0 201   0   0   0   1   0   0   0   1]
 [  1   0 179   2   0   3   0   1   1   1]
 [  3   1   1 178   3   2   3   2   2   5]
 [  1   0   0   3 200   0   1   7   2   1]
 [  5  

CNN

In [13]:
from tensorflow import keras 
from tensorflow.keras import Sequential
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import initializers
from tensorflow.keras import models, layers

In [532]:
# model = models.Sequential()
# model.add(layers.Dense(1024, activation='elu',
#                       kernel_initializer=initializers.he_normal(),
#                       input_shape=(X_train.shape[1],)))
# model.add(layers.BatchNormalization())
# model.add(layers.Dropout(0.5))
# model.add(layers.Dense(1024, activation='elu',
#                       kernel_initializer=initializers.he_normal()))
# model.add(layers.BatchNormalization())
# model.add(layers.Dropout(0.5))
# model.add(layers.Dense(1024, activation='elu',
#                       kernel_initializer=initializers.he_normal()))
# model.add(layers.BatchNormalization())
# model.add(layers.Dropout(0.5))
# model.add(layers.Dense(256, activation='softmax'))

# model.compile(optimizer=optimizers.Adam(lr=0.0001),
#               loss='sparse_categorical_crossentropy',
#               metrics=['accuracy'])




# model.summary()



Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 1024)              79872     
                                                                 
 batch_normalization_6 (Bat  (None, 1024)              4096      
 chNormalization)                                                
                                                                 
 dropout_9 (Dropout)         (None, 1024)              0         
                                                                 
 dense_13 (Dense)            (None, 1024)              1049600   
                                                                 
 batch_normalization_7 (Bat  (None, 1024)              4096      
 chNormalization)                                                
                                                                 
 dropout_10 (Dropout)        (None, 1024)             

In [533]:
# history = model.fit(X_train,
#                     y_train,
#                     epochs=500,
#                     batch_size=32)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [534]:
# test_loss, test_acc = model.evaluate(X_test,y_test)
# print('test_acc: ',test_acc)

test_acc:  0.9699699878692627


In [535]:
# predictions = model.predict(X_test)



In [536]:
# maxpredicts = []
# for element in predictions:
#     temp = np.argmax(element)
#     maxpredicts.append(temp)

# y_pred = np.array(maxpredicts)

In [537]:
# print(classification_report(y_test, y_pred, target_names=target_names))

              precision    recall  f1-score   support

      0blues       0.97      0.98      0.97       208
  1classical       0.99      0.99      0.99       203
    2country       0.96      0.95      0.96       188
      3disco       0.95      0.96      0.96       200
     4hiphop       0.99      0.97      0.98       215
       5jazz       0.97      0.98      0.98       191
      6metal       0.98      0.97      0.98       204
        7pop       0.94      0.99      0.97       181
     8reggae       1.00      0.96      0.98       210
       9rock       0.95      0.94      0.95       198

    accuracy                           0.97      1998
   macro avg       0.97      0.97      0.97      1998
weighted avg       0.97      0.97      0.97      1998



In [17]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout
from keras import initializers, optimizers
import numpy as np


# K-fold cross-validation için StratifiedKFold kullanın (k=10)
kfold = StratifiedKFold(n_splits=10,shuffle=True, random_state=42)

best_model = None
best_accuracy = 0

for train_index, val_index in kfold.split(X_train, y_train):
    # Train ve validation verilerini oluşturun
    X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

    # Modeli oluşturun
    model = Sequential()
    model.add(Dense(1024, activation='elu', kernel_initializer=initializers.he_normal(), input_shape=(X_train.shape[1],)))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='elu', kernel_initializer=initializers.he_normal()))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(1024, activation='elu', kernel_initializer=initializers.he_normal()))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='softmax'))

    model.compile(optimizer=optimizers.Adam(lr=0.0001),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    # Modeli eğitin
    model.fit(X_train_fold, y_train_fold, epochs=500, batch_size=32)

    # Validation seti üzerinde performansı değerlendirin
    y_val_pred = np.argmax(model.predict(X_val_fold), axis=1)
    val_accuracy = accuracy_score(y_val_fold, y_val_pred)

    print(f"Validation Accuracy: {val_accuracy}")

    # En iyi modeli güncelle
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_model = model

# En iyi modeli kullanarak test seti üzerinde performansı değerlendirin
y_test_pred = np.argmax(best_model.predict(X_test), axis=1)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"\nBest Model Test Accuracy: {test_accuracy}")




Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 1024)              79872     
                                                                 
 batch_normalization_9 (Bat  (None, 1024)              4096      
 chNormalization)                                                
                                                                 
 dropout_9 (Dropout)         (None, 1024)              0         
                                                                 
 dense_13 (Dense)            (None, 1024)              1049600   
                                                                 
 batch_normalization_10 (Ba  (None, 1024)              4096      
 tchNormalization)                                               
                                                                 
 dropout_10 (Dropout)        (None, 1024)             

KeyboardInterrupt: 