In [50]:
import pandas as pd
import csv
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Lambda, LSTM, Dense, Dropout, Input, Bidirectional
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
from gensim.models import Word2Vec
from gensim.test.utils import simple_preprocess
from sklearn.preprocessing import StandardScaler  # For Z-score standardization
import os
import re
import pickle
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from numba import cuda 

In [2]:
print(tf.test.is_gpu_available())
print(tf.config.list_physical_devices('GPU'))

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
fin_fil_df = pd.read_pickle('pickles/fin.pkl')

In [4]:
fin_fil_df

Unnamed: 0,id,genre
0,0009fFIM1eYThaPg,pop
2,002Jyd0vN4HyCpqL,classic rock
5,00CH4HJdxQQQbJfu,indie rock
8,00IeldeA9ijZOL0P,pop
10,00LuPWdOccBb09bW,alternative rock
...,...,...
109262,zzx8CWdM7qkxKQpC,indie rock
109265,zzz0n04uuTUA7fNh,pop
109266,zzzj3LYaZtYtbzSr,singer-songwriter
109267,zzznMjZAKnJJXQSj,pop


In [15]:
rows = []
csv_path = 'data/music4all_subset/id_metadata.csv'
with open(csv_path, 'r', newline='') as csvfile:
    csv_reader = csv.reader(csvfile)
    
    header = next(csv_reader)[0].split('\t')
    for row in csv_reader:
        temp = row[0].split('\t')
        if len(row) > 2:
            temp.extend(row[2:])
        rows.append(temp)

In [25]:
full_metadata = pd.DataFrame(rows, columns=header)
fil_metadata = full_metadata[full_metadata['id'].isin(fin_fil_df['id'])]
fil_metadata = fil_metadata.drop(columns=['spotify_id', 'duration_ms', 'id'])

In [22]:
encoder = LabelEncoder()
labels = fin_fil_df['genre']
genre_labels = encoder.fit_transform(labels)
print(encoder.classes_)

['alternative rock' 'ambient' 'classic rock' 'electronic' 'folk'
 'indie rock' 'metal' 'pop' 'rap' 'singer-songwriter' 'soul']


In [28]:
np.array(fil_metadata).shape

(44860, 8)

In [51]:
scaler = StandardScaler()
df_normalized = pd.DataFrame(scaler.fit_transform(fil_metadata.astype(float)), columns=fil_metadata.columns)

In [64]:
X_train, X_test, y_train, y_test = train_test_split(df_normalized, genre_labels, test_size=0.2)

In [53]:
X_train.shape, X_test.shape

((35888, 8), (8972, 8))

In [65]:
with open('pickles/features_ttsdata.pkl', 'wb') as f:
    pickle.dump((X_train, X_test, y_train, y_test), f)

In [57]:
es_cb = EarlyStopping(monitor='val_loss', patience=3, verbose=1)
def getModelCheckpoint(name):
    return ModelCheckpoint(
        filepath=f'models/{name}.h5',
        save_best_only=True,
        monitor='val_accuracy',
        mode='max',
        verbose=1
    )

In [58]:
model = tf.keras.Sequential([
    Input(shape=(8,)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(11, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [59]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, batch_size=32, callbacks=[es_cb, getModelCheckpoint('features_dense')])

Epoch 1/30
Epoch 1: val_accuracy improved from -inf to 0.39857, saving model to models\features_dense.h5
Epoch 2/30
Epoch 2: val_accuracy improved from 0.39857 to 0.41173, saving model to models\features_dense.h5
Epoch 3/30
Epoch 3: val_accuracy did not improve from 0.41173
Epoch 4/30
Epoch 4: val_accuracy did not improve from 0.41173
Epoch 5/30
Epoch 5: val_accuracy did not improve from 0.41173
Epoch 6/30
Epoch 6: val_accuracy improved from 0.41173 to 0.41540, saving model to models\features_dense.h5
Epoch 7/30
Epoch 7: val_accuracy did not improve from 0.41540
Epoch 8/30
Epoch 8: val_accuracy did not improve from 0.41540
Epoch 9/30
Epoch 9: val_accuracy improved from 0.41540 to 0.41741, saving model to models\features_dense.h5
Epoch 10/30
Epoch 10: val_accuracy did not improve from 0.41741
Epoch 11/30
Epoch 11: val_accuracy did not improve from 0.41741
Epoch 12/30
Epoch 12: val_accuracy improved from 0.41741 to 0.42064, saving model to models\features_dense.h5
Epoch 13/30
Epoch 13: v

<keras.callbacks.History at 0x26ab78ef3d0>

In [60]:
X_train, X_test, y_train, y_test = train_test_split(df_normalized.drop(columns=['release', 'mode']), genre_labels, test_size=0.2)

In [62]:
model2 = tf.keras.Sequential([
    Input(shape=(6,)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(11, activation='softmax')
])

model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [63]:
model2.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, batch_size=32, callbacks=[es_cb, getModelCheckpoint('features2_dense')])

Epoch 1/30
Epoch 1: val_accuracy improved from -inf to 0.36480, saving model to models\features2_dense.h5
Epoch 2/30
Epoch 2: val_accuracy improved from 0.36480 to 0.36971, saving model to models\features2_dense.h5
Epoch 3/30
Epoch 3: val_accuracy improved from 0.36971 to 0.37171, saving model to models\features2_dense.h5
Epoch 4/30
Epoch 4: val_accuracy improved from 0.37171 to 0.37517, saving model to models\features2_dense.h5
Epoch 5/30
Epoch 5: val_accuracy did not improve from 0.37517
Epoch 6/30
Epoch 6: val_accuracy improved from 0.37517 to 0.37684, saving model to models\features2_dense.h5
Epoch 7/30
Epoch 7: val_accuracy did not improve from 0.37684
Epoch 8/30
Epoch 8: val_accuracy did not improve from 0.37684
Epoch 9/30
Epoch 9: val_accuracy did not improve from 0.37684
Epoch 10/30
Epoch 10: val_accuracy did not improve from 0.37684
Epoch 11/30
Epoch 11: val_accuracy improved from 0.37684 to 0.37717, saving model to models\features2_dense.h5
Epoch 12/30
Epoch 12: val_accuracy 

<keras.callbacks.History at 0x26ab8f0c430>