In [2]:
%pip install contractions nltk pandas scikit-learn

Collecting contractions
  Downloading contractions-0.1.73-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting nltk
  Downloading nltk-3.9.2-py3-none-any.whl.metadata (3.2 kB)
Collecting pandas
  Downloading pandas-2.3.3-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.8.0-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting textsearch>=0.0.21 (from contractions)
  Downloading textsearch-0.0.24-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting click (from nltk)
  Downloading click-8.3.1-py3-none-any.whl.metadata (2.6 kB)
Collecting joblib (from nltk)
  Downloading joblib-1.5.3-py3-none-any.whl.metadata (5.5 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2025.11.3-cp313-cp313-win_amd64.whl.metadata (41 kB)
Collecting tqdm (from nltk)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.4.0-cp313-cp313-win_amd64.whl.metadata (6.6 kB)
Collecting pytz>=2020.1 (f


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
import contractions
import nltk
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer

In [4]:
df = pd.read_csv('spotifyDataset.csv', engine='python', on_bad_lines='skip')
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


**Preprocessing phase**

In [5]:
df.isna().sum() #check for na but there is no missing values

artist    0
song      0
link      0
text      0
dtype: int64

In [6]:
#Convert to lowercase and also we create new column to keep the original data
df["Lyrics"] = df["text"].str.lower()
df["song_name"] = df["song"].str.lower()
df.head()

Unnamed: 0,artist,song,link,text,Lyrics,song_name
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA...","look at her face, it's a wonderful face \r\na...",ahe's my kind of girl
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen...","take it easy with me, please \r\ntouch me gen...","andante, andante"
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...,i'll never know why i had to go \r\nwhy i had...,as good as new
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...,making somebody happy is a question of give an...,bang
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...,making somebody happy is a question of give an...,bang-a-boomerang


In [7]:
#It converts English contractions into their full forms (e.g., they’re → they are, don’t → do not). because when we delete unneccessary characters the ' will be deleted and the sentence will be wrong so they're will be theyre so this is unacceptable
df["Lyrics"] = df["Lyrics"].apply(
    lambda x: contractions.fix(x)
)
df["song_name"] = df["song_name"].apply(
    lambda x: contractions.fix(x)
)
df.head(9)

Unnamed: 0,artist,song,link,text,Lyrics,song_name
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA...","look at her face, it is a wonderful face \r\n...",ahe's my kind of girl
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen...","take it easy with me, please \r\ntouch me gen...","andante, andante"
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...,i will never know why i had to go \r\nwhy i h...,as good as new
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...,making somebody happy is a question of give an...,bang
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...,making somebody happy is a question of give an...,bang-a-boomerang
5,ABBA,Burning My Bridges,/a/abba/burning+my+bridges_20003011.html,"Well, you hoot and you holler and you make me ...","well, you hoot and you holler and you make me ...",burning my bridges
6,ABBA,Cassandra,/a/abba/cassandra_20002811.html,Down in the street they're all singing and sho...,down in the street they are all singing and sh...,cassandra
7,ABBA,Chiquitita,/a/abba/chiquitita_20002978.html,"Chiquitita, tell me what's wrong \r\nYou're e...","chiquitita, tell me what is wrong \r\nyou are...",chiquitita
8,ABBA,Crazy World,/a/abba/crazy+world_20003013.html,I was out with the morning sun \r\nCouldn't s...,i was out with the morning sun \r\ncould not ...,crazy world


In [8]:
# Normalize spaces, keep line breaks
df["Lyrics"] = df["Lyrics"].str.replace(
    r"[ \t]+", " ", regex=True
).str.strip()
df.head(20)


Unnamed: 0,artist,song,link,text,Lyrics,song_name
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA...","look at her face, it is a wonderful face \r\na...",ahe's my kind of girl
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen...","take it easy with me, please \r\ntouch me gent...","andante, andante"
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...,i will never know why i had to go \r\nwhy i ha...,as good as new
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...,making somebody happy is a question of give an...,bang
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...,making somebody happy is a question of give an...,bang-a-boomerang
5,ABBA,Burning My Bridges,/a/abba/burning+my+bridges_20003011.html,"Well, you hoot and you holler and you make me ...","well, you hoot and you holler and you make me ...",burning my bridges
6,ABBA,Cassandra,/a/abba/cassandra_20002811.html,Down in the street they're all singing and sho...,down in the street they are all singing and sh...,cassandra
7,ABBA,Chiquitita,/a/abba/chiquitita_20002978.html,"Chiquitita, tell me what's wrong \r\nYou're e...","chiquitita, tell me what is wrong \r\nyou are ...",chiquitita
8,ABBA,Crazy World,/a/abba/crazy+world_20003013.html,I was out with the morning sun \r\nCouldn't s...,i was out with the morning sun \r\ncould not s...,crazy world
9,ABBA,Crying Over You,/a/abba/crying+over+you_20177611.html,I'm waitin' for you baby \r\nI'm sitting all ...,i am waitin' for you baby \r\ni am sitting all...,crying over you


In [9]:
#Correct wrong spelling in song column
corrections = {
    "ahe": "she",
    "hte": "the",
    "thier": "their"
}

for wrong, correct in corrections.items():
    df["song_name"] = df["song_name"].str.replace(
        rf"\b{wrong}\b", correct, regex=True
    )
df.head(2)

Unnamed: 0,artist,song,link,text,Lyrics,song_name
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA...","look at her face, it is a wonderful face \r\na...",she's my kind of girl
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen...","take it easy with me, please \r\ntouch me gent...","andante, andante"


Skip this preprocessing part for now As we generate lyrics so removing stop words isn't efficient but we are going to try with another way or let the model predict the stop words

In [10]:
# stop_words = set(stopwords.words("english"))
#Manually removing stopwords during preprocessing may lead to the unintended loss of words or phrases that are important for distinguishing between songs, particularly when phrase-level patterns are relevant. Since manual removal permanently alters the text, it can negatively affect feature extraction and limit flexibility in experimentation. Therefore, stopword handling is delegated to the TF-IDF vectorizer, which removes common terms dynamically during feature construction while preserving the original text. This approach reduces noise without discarding potentially informative lexical patterns and is more suitable for song identification tasks.
lemmatizer = WordNetLemmatizer()
vectorizer = TfidfVectorizer(
    stop_words="english",   # correct for song detection
    ngram_range=(1, 3),     # unigrams + bigrams + trigrams
    min_df=2                # remove very rare noise
) #skip this for now

In [11]:
# Remove stopwords
#Original text remains unchanged. Stopwords are removed only in the feature space.You can:change stopword list.re-vectorize.reuse raw text
x = vectorizer.fit_transform(df["Lyrics"])
vectorizer.get_stop_words() #skip this for now until we try during training process

frozenset({'a',
           'about',
           'above',
           'across',
           'after',
           'afterwards',
           'again',
           'against',
           'all',
           'almost',
           'alone',
           'along',
           'already',
           'also',
           'although',
           'always',
           'am',
           'among',
           'amongst',
           'amoungst',
           'amount',
           'an',
           'and',
           'another',
           'any',
           'anyhow',
           'anyone',
           'anything',
           'anyway',
           'anywhere',
           'are',
           'around',
           'as',
           'at',
           'back',
           'be',
           'became',
           'because',
           'become',
           'becomes',
           'becoming',
           'been',
           'before',
           'beforehand',
           'behind',
           'being',
           'below',
           'beside',
           'besides'

In [12]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\LEGION\AppData\Roaming\nltk_data...


True

In [None]:

# Import core libraries

# TensorFlow and Keras for building and training the neural network
import tensorflow as tf

# Numerical and data handling libraries
import numpy as np
import pandas as pd


# Scikit-learn utilities

# For splitting the dataset into train/validation/test
from sklearn.model_selection import train_test_split

# For encoding artist names as integers
from sklearn.preprocessing import LabelEncoder

# Keras preprocessing utilities

# Tokenizer converts text into integer sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# Pad sequences to the same length
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Keras layers and model utilities

from tensorflow.keras.layers import (
    Input, Embedding, LSTM, Dense, Dropout,
    Flatten, Concatenate
)
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping


# Text normalization utility

# Expands contractions (e.g., "don't" → "do not")
import contractions



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting tensorflow
  Downloading tensorflow-2.20.0-cp313-cp313-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.12.19-py2.py3-none-any.whl.metadata (1.0 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.7.0-py3-none-any.whl.metadata (1.5 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Using cached libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Using cached opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf>=5.28.0 (from tensorflow)
  Downloading protobuf-6.

  if not hasattr(np, "object"):


In [None]:

# Model and training hyperparameters

TEXT_EMBEDDING_DIM = 100      # Size of word embeddings
ARTIST_EMBEDDING_DIM = 50    # Size of artist embeddings
GENRE_EMBEDDING_DIM = 30     # Size of genre embeddings (placeholder)

LSTM_UNITS = 128             # Number of LSTM units
MAX_SEQUENCE_LENGTH = 50     # Max length of lyric sequences
BATCH_SIZE = 128             # Training batch size
EPOCHS = 20                  # Maximum number of training epochs


In [None]:

# Lyrics preprocessing

# Ensure a clean "Lyrics" column exists:
# - lowercase text
# - expand contractions
# - normalize whitespace
if "Lyrics" not in df.columns:
    df["Lyrics"] = df["text"].str.lower()
    df["Lyrics"] = df["Lyrics"].apply(contractions.fix)
    df["Lyrics"] = df["Lyrics"].str.replace(r"[ \t]+", " ", regex=True).str.strip()


In [None]:

# Encode artist names as integers

# This allows artists to be used in an embedding layer
artist_encoder = LabelEncoder()
df["artist_encoded"] = artist_encoder.fit_transform(df["artist"])

# Number of unique artists
ARTIST_VOCAB_SIZE = len(artist_encoder.classes_)


In [None]:

# Split dataset into Train / Validation / Test

# 70% train, 15% validation, 15% test
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

print(f"Train size: {len(train_df)}")
print(f"Val size: {len(val_df)}")
print(f"Test size: {len(test_df)}")


Train size: 40355
Val size: 8647
Test size: 8648


In [None]:

# Tokenizer (fit ONLY on training lyrics)

# The tokenizer converts raw text into sequences of integer indices,
# where each unique word in the training corpus is assigned a unique ID.
#
# Fitting the tokenizer exclusively on the training set is a critical step
# to prevent information leakage. This ensures that the model does not gain
# prior knowledge of words or word frequencies from the validation or test
# sets, preserving the integrity of model evaluation.
#
# An out-of-vocabulary (OOV) token is specified to handle words that appear
# in validation or test data but were not seen during training. Such words
# are mapped to a shared <unk> token instead of being discarded.
tokenizer = Tokenizer(oov_token="<unk>")
tokenizer.fit_on_texts(train_df["Lyrics"])


# Vocabulary size

# The vocabulary size represents the total number of unique tokens learned
# by the tokenizer, plus one additional index reserved for padding.
# This value determines the size of the embedding matrix and the output
# layer of the model.
VOCAB_SIZE = len(tokenizer.word_index) + 1



In [None]:
# Sliding-window text processing

# This function prepares the lyrics data for next-word prediction by
# converting each song into multiple training samples using a sliding-
# window approach.
#
# Instead of using the full lyric at once, the model is trained on
# progressively longer word sequences (contexts) to predict the next word.
# For example:
#   "I love you" →
#   ["I", "love"] → "you"
#
# This approach prevents data leakage and forces the model to learn genuine
# language patterns rather than memorizing entire lyrics.
#
# In addition, the function tracks the index of the original song from which
# each training sample is generated. This is necessary to correctly align
# auxiliary features such as artist and genre with each sliding window.
def process_text_data(data_frame, tokenizer_obj, max_seq_len):
    input_sequences = []   # Stores all generated word sequences
    song_indices = []      # Stores the source song index for each sequence

    # Iterate over each song lyric
    for idx, line in enumerate(data_frame["Lyrics"]):
        # Convert lyric text into a sequence of token IDs
        token_list = tokenizer_obj.texts_to_sequences([line])[0]

        # Generate incremental context windows
        # Each window is used to predict the next word
        for i in range(2, len(token_list)):
            input_sequences.append(token_list[:i])
            song_indices.append(idx)

    # Pad all sequences to the same length for batch processing
    # Pre-padding preserves the most recent context at the end of the sequence
    input_sequences = pad_sequences(
        input_sequences,
        maxlen=max_seq_len,
        padding="pre"
    )

    # Separate inputs and targets:
    # X contains the context words
    # y contains the true next word to be predicted
    X = input_sequences[:, :-1]
    y = input_sequences[:, -1]

    return X, y, song_indices



In [None]:

# Generate training, validation, and test data

X_text_train, y_train, train_song_idx = process_text_data(
    train_df, tokenizer, MAX_SEQUENCE_LENGTH
)

X_text_val, y_val, val_song_idx = process_text_data(
    val_df, tokenizer, MAX_SEQUENCE_LENGTH
)

X_text_test, y_test, test_song_idx = process_text_data(
    test_df, tokenizer, MAX_SEQUENCE_LENGTH
)


In [None]:

# Genre placeholder (dummy feature)

# All songs are assigned genre 0
train_df["genre_encoded"] = 0
val_df["genre_encoded"] = 0
test_df["genre_encoded"] = 0


In [None]:

# Align artist and genre labels with sliding windows

# Each text window must receive the artist/genre of its source song
train_artist = train_df.iloc[train_song_idx]["artist_encoded"].values.reshape(-1, 1)
val_artist = val_df.iloc[val_song_idx]["artist_encoded"].values.reshape(-1, 1)
test_artist = test_df.iloc[test_song_idx]["artist_encoded"].values.reshape(-1, 1)

train_genre = train_df.iloc[train_song_idx]["genre_encoded"].values.reshape(-1, 1)
val_genre = val_df.iloc[val_song_idx]["genre_encoded"].values.reshape(-1, 1)
test_genre = test_df.iloc[test_song_idx]["genre_encoded"].values.reshape(-1, 1)


In [None]:

# Model input dimensions

GENRE_VOCAB_SIZE = 1                  # Only one dummy genre
MODEL_INPUT_SEQUENCE_LENGTH = MAX_SEQUENCE_LENGTH - 1


# Define model inputs

text_input = Input(shape=(MODEL_INPUT_SEQUENCE_LENGTH,), name="text_input")
artist_input = Input(shape=(1,), name="artist_input")
genre_input = Input(shape=(1,), name="genre_input")


# Text branch (language modeling)

text_embed = Embedding(VOCAB_SIZE, TEXT_EMBEDDING_DIM)(text_input)
text_lstm = LSTM(LSTM_UNITS)(text_embed)



# Artist branch

# Artist embedding with heavy dropout to reduce memorization
artist_embed = Embedding(ARTIST_VOCAB_SIZE, ARTIST_EMBEDDING_DIM)(artist_input)
artist_embed = Flatten()(artist_embed)
artist_embed = Dropout(0.7)(artist_embed)


# Genre branch (placeholder)

genre_embed = Embedding(GENRE_VOCAB_SIZE, GENRE_EMBEDDING_DIM)(genre_input)
genre_embed = Flatten()(genre_embed)



# Merge all branches

merged = Concatenate()([text_lstm, artist_embed, genre_embed])


# Fully connected layers
# These layers take the combined representation produced by the text (LSTM),
# artist embedding, and genre embedding branches and learn higher-level,
# non-linear feature interactions between them.
#
# The first Dense layer (256 units) allows the model to capture complex
# relationships between linguistic patterns and artist/genre context.
# ReLU activation introduces non-linearity, enabling the model to learn
# richer representations.
#
# Dropout with a rate of 0.5 is applied after each Dense layer to reduce
# overfitting by randomly disabling half of the neurons during training,
# forcing the model to generalize rather than memorize the training data.
#
# The second Dense layer (128 units) further refines these learned features,
# compressing the representation before passing it to the output layer
# for next-word prediction.

x = Dense(256, activation="relu")(merged)
x = Dropout(0.5)(x)

x = Dense(128, activation="relu")(x)
x = Dropout(0.5)(x)



# Output layer

# Predict next word over the entire vocabulary
# This final Dense layer performs the actual next-word prediction task.
# It maps the high-level feature representation produced by the fully
# connected layers to a probability distribution over the entire
# vocabulary.
#
# The number of units is equal to VOCAB_SIZE, meaning each neuron
# corresponds to a unique word in the tokenizer vocabulary.
#
# The softmax activation function converts the raw logits into normalized
# probabilities that sum to 1, allowing the model to estimate the likelihood
# of each possible next word given the input context.
#
# During training, the model is optimized using sparse categorical
# cross-entropy loss, which encourages the predicted probability of the
# true next word to be as high as possible.
output = Dense(VOCAB_SIZE, activation="softmax")(x)

# Build the model

model = Model(
    inputs=[text_input, artist_input, genre_input],
    outputs=output
)

model.summary()



In [None]:

# Compile the model

# Top-5 accuracy is more meaningful for language modeling
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=[
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name="top_5_accuracy")
    ]
)


In [None]:

# Early stopping callback

# Stops training when validation loss stops improving
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)



# Train the model

history = model.fit(
    [X_text_train, train_artist, train_genre],
    y_train,
    validation_data=(
        [X_text_val, val_artist, val_genre],
        y_val
    ),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[early_stopping]
)


Epoch 1/20
[1m84523/84523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24371s[0m 288ms/step - loss: 4.9800 - top_5_accuracy: 0.4013 - val_loss: 4.7388 - val_top_5_accuracy: 0.4301
Epoch 2/20
[1m84523/84523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23876s[0m 282ms/step - loss: 4.8634 - top_5_accuracy: 0.4169 - val_loss: 4.7303 - val_top_5_accuracy: 0.4341
Epoch 3/20
[1m84523/84523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23761s[0m 281ms/step - loss: 4.8697 - top_5_accuracy: 0.4182 - val_loss: 4.7624 - val_top_5_accuracy: 0.4323
Epoch 4/20
[1m84523/84523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23777s[0m 281ms/step - loss: 4.8895 - top_5_accuracy: 0.4178 - val_loss: 4.7932 - val_top_5_accuracy: 0.4309
Epoch 5/20
[1m84523/84523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23774s[0m 281ms/step - loss: 4.9109 - top_5_accuracy: 0.4167 - val_loss: 4.8322 - val_top_5_accuracy: 0.4295


In [None]:

# Evaluate on test set

test_loss, test_top5_acc = model.evaluate(
    [X_text_test, test_artist, test_genre],
    y_test
)

print("Test Loss:", test_loss)
print("Test Top-5 Accuracy:", test_top5_acc)


[1m73041/73041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1839s[0m 25ms/step - loss: 4.7411 - top_5_accuracy: 0.4339
Test Loss: 4.741110801696777
Test Top-5 Accuracy: 0.43387266993522644
