In [92]:
!python -m pip install matplotlib numpy scikit-learn opencv-python tensorflow[and-cuda] pandas



You should consider upgrading via the 'e:\Aditya\Python\python.exe -m pip install --upgrade pip' command.


In [93]:
sequence_length = 20
spectrogram_height = 217
spectrogram_width = 50
latent_dim = 128
num_classes = 10

In [101]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from itertools import product
import pandas as pd

def create_cnn_model(val_kernel_regularizer, num_filters):
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(num_filters[0], (3, 3), activation='relu', input_shape=(spectrogram_height, spectrogram_width, 1), kernel_regularizer=regularizers.l2(2*val_kernel_regularizer)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(num_filters[1], (3, 3), activation='relu', kernel_regularizer=regularizers.l2(2*val_kernel_regularizer)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(num_filters[2], (3, 3), activation='relu', kernel_regularizer=regularizers.l2(val_kernel_regularizer)))
    model.add(layers.Flatten())
    model.add(layers.Dense(latent_dim, activation='relu', kernel_regularizer=regularizers.l2(val_kernel_regularizer)))
    return model

def create_attention_model(sequence_length, latent_dim, num_attention_heads):
    inputs = tf.keras.Input(shape=(sequence_length, latent_dim))
    attention_heads = []
    for _ in range(num_attention_heads):
        attention_head = layers.Attention()([inputs, inputs])
        attention_heads.append(attention_head)
    attention_concat = layers.Concatenate()(attention_heads)
    attention_output = layers.Dense(latent_dim, activation='relu')(attention_concat)
    return tf.keras.Model(inputs=inputs, outputs=attention_output)

def create_classification_model(sequence_length, latent_dim, num_classes, num_attention_heads):
    inputs = tf.keras.Input(shape=(sequence_length, latent_dim))
    attention_model = create_attention_model(sequence_length, latent_dim, num_attention_heads)
    attention_output = attention_model(inputs)
    classification_output = layers.Dense(num_classes, activation='softmax')(attention_output[:, -1, :])
    return tf.keras.Model(inputs=inputs, outputs=classification_output)

def create_model(sequence_length, spectrogram_height, spectrogram_width, latent_dim, num_classes, num_attention_heads, num_filters, val_kernel_regularizer):
    cnn_model = create_cnn_model(val_kernel_regularizer, num_filters)
    classification_model = create_classification_model(sequence_length, latent_dim, num_classes, num_attention_heads)
    inputs = tf.keras.Input(shape=(sequence_length, spectrogram_height, spectrogram_width, 1))
    token_embeddings = layers.TimeDistributed(cnn_model)(inputs)
    classification_output = classification_model(token_embeddings)
    return tf.keras.Model(inputs=inputs, outputs=classification_output)

def train(model, X_train, y_train, X_val, y_val, reduce_factor, reduce_patience, epochs=20, batch_size=32):
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=reduce_factor, patience=reduce_patience)
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val), callbacks=[reduce_lr])
    return history

In [95]:
import os
import cv2
import numpy as np

input_dir = 'E:\\Aditya\\IIT_Hyderabad\\Assignments\\Deep Learning\\Project\\Data\\images_cropped_vertical'


spectrogram_height = 217
spectrogram_width = 50
X_train = []
y_train = []

for label, genre_folder in enumerate(os.listdir(input_dir)):
    genre_path = os.path.join(input_dir, genre_folder)
    if not os.path.isdir(genre_path):
        continue

    genre_x = []
    genre_y = []
    for number, filename in enumerate(os.listdir(genre_path)):
        img_path = os.path.join(genre_path, filename)
        img = cv2.imread(img_path)
        if img is None:
            continue
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img_resized = cv2.resize(img_gray, (spectrogram_width, spectrogram_height))

        genre_x.append(img_resized)
        genre_y.append(label)
        if ((number + 1)%20 == 0):
            X_train.append(genre_x)
            genre_x = []
            genre_y = []
            y_train.append(label)


X_train = np.array(X_train)
y_train = np.array(y_train)

In [96]:
shuffle_indices = np.arange(X_train.shape[0])
np.random.shuffle(shuffle_indices)
X_train = X_train[shuffle_indices]
y_train = y_train[shuffle_indices]
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

X_train shape: (999, 20, 217, 50)
y_train shape: (999,)


In [97]:
import os
import numpy as np

train_data = 'E:\\Aditya\\IIT_Hyderabad\\Assignments\\Deep Learning\\Project\\Data\\train_data'
os.makedirs(train_data, exist_ok=True)

np.save(os.path.join(train_data, 'X_train.npy'), X_train)
np.save(os.path.join(train_data, 'y_train.npy'), y_train)


In [98]:
!python -m pip install pandas



You should consider upgrading via the 'e:\Aditya\Python\python.exe -m pip install --upgrade pip' command.


In [None]:
num_filters_list = [(32, 64, 64), (32, 64, 128)]
kernel_regularizers = [0.005, 0.01, 0.02]
num_attention_heads_list = [2, 4, 6]
batch_sizes = [32, 64]
results = []


for num_filters, kernel_regularizer, num_attention_heads, batch_size in product(num_filters_list, kernel_regularizers, num_attention_heads_list, batch_sizes):
    best_val_accuracies = []
    best_model_weights = []
    for train_index, val_index in StratifiedKFold(n_splits=5).split(X_train, y_train):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        model = create_model(sequence_length, spectrogram_height, spectrogram_width, latent_dim, num_classes, num_attention_heads, num_filters, kernel_regularizer)
        history = train(model, X_train_fold, y_train_fold, X_val_fold, y_val_fold, reduce_factor = 0.2, reduce_patience = 4, epochs = 15, batch_size = batch_size)
    
        _, val_accuracy = model.evaluate(X_val_fold, y_val_fold)
        best_val_accuracies.append(val_accuracy)
        model_weights = model.get_weights()
        best_model_weights.append(model_weights)
        results.append({
                    'Num Filters': num_filters,
                    'Kernel Regularizer': kernel_regularizer,
                    'Num Attention Heads': num_attention_heads,
                    'Batch Size': batch_size,
                    'Validation Accuracy': val_accuracy
                })
        break

In [112]:
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='Validation Accuracy', ascending=False)
pd.set_option('display.max_rows', None)

In [113]:
results_df.head(10)

Unnamed: 0,Num Filters,Kernel Regularizer,Num Attention Heads,Batch Size,Validation Accuracy
2,"(32, 64, 64)",0.005,4,32,0.51
1,"(32, 64, 64)",0.005,2,64,0.51
33,"(32, 64, 128)",0.02,4,64,0.5
3,"(32, 64, 64)",0.005,4,64,0.485
20,"(32, 64, 128)",0.005,4,32,0.485
23,"(32, 64, 128)",0.005,6,64,0.485
21,"(32, 64, 128)",0.005,4,64,0.475
6,"(32, 64, 64)",0.01,2,32,0.47
27,"(32, 64, 128)",0.01,4,64,0.47
11,"(32, 64, 64)",0.01,6,64,0.465


In [106]:
best_model_row  = results_df.loc[results_df['Validation Accuracy'].idxmax()]

In [None]:
reduce_factor_list = [0.2, 0.3]
reduce_patience_list = [4, 6]
results_reduce = []

for reduce_factor, reduce_patience in product(reduce_factor_list, reduce_patience_list):
    best_val_accuracies = []
    best_model_weights = []
    for train_index, val_index in StratifiedKFold(n_splits=8).split(X_train, y_train):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        model = create_model(sequence_length, spectrogram_height, spectrogram_width, latent_dim, num_classes, best_model_row['Num Attention Heads'], best_model_row['Num Filters'], best_model_row['Kernel Regularizer'])
        history = train(model, X_train_fold, y_train_fold, X_val_fold, y_val_fold, reduce_factor, reduce_patience, epochs = 20, batch_size = best_model_row['Batch Size'])
        
        _, val_accuracy = model.evaluate(X_val_fold, y_val_fold)
        best_val_accuracies.append(val_accuracy)
        
        # Save the weights of the model
        model_weights = model.get_weights()
        best_model_weights.append(model_weights)
        results_reduce.append({
                    'Reduce Factor': reduce_factor,
                    'Reduce Patience': reduce_patience,
                })
        break

In [115]:
print(f"Best Model Parameters:\nNumber of Filters: {best_model_row['Num Filters']}\nKernel Regularizer Value: {best_model_row['Kernel Regularizer']}\nNumber of Attention Heads: {best_model_row['Num Attention Heads']}\nBatch Size: {best_model_row['Batch Size']}\nReduce Factor: 0.3\nReduce Patience: 6")

Best Model Parameters:
Number of Filters: (32, 64, 64)
Kernel Regularizer Value: 0.005
Number of Attention Heads: 4
Batch Size: 32
Reduce Factor: 0.3
Reduce Patience: 6
