In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
import os
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

# File paths 
mfccs_path = r'/Users/jrgroth/Downloads/SHAZAM-CAPSTONE-main/SHAZAM-CAPSTONE-main/librosa_dataset/mfcc/MFCC_Data/MFCC_Data'
hist_path = r'/Users/jrgroth/Downloads/SHAZAM-CAPSTONE-main/SHAZAM-CAPSTONE-main/histograms/histogram_data'


def load_csv_files(directory):
    data = []
    for file_name in os.listdir(directory):
        if file_name.endswith('.csv'):
            file_path = os.path.join(directory, file_name)
            df = pd.read_csv(file_path)
            data.append(df.values)  # Load values as a list of numpy arrays
    return data


mfcc_data = load_csv_files(mfccs_path)
hist_data = load_csv_files(hist_path)

print(f"Number of MFCC data files: {len(mfcc_data)}")
print(f"Number of histogram data files: {len(hist_data)}")

#CNN model
def create_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(128, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='linear'))  
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
    return model


num_songs = len(mfcc_data)
results = []

for test_index in range(num_songs):

    train_indices = [i for i in range(num_songs) if i != test_index]
    X_train = [mfcc_data[i] for i in train_indices]
    X_test = mfcc_data[test_index]
    y_train = [hist_data[i] for i in train_indices]
    y_test = hist_data[test_index]

    # TODO: Find way to make data uniform for CNN

    # Create and train the model
    model = create_cnn_model(input_shape=X_train.shape[1:])
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

    # Test the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results.append((mse, r2))

    # Plot predicted vs. actual histograms
    plt.figure()
    plt.plot(y_test, label='Actual')
    plt.plot(y_pred, label='Predicted')
    plt.legend()
    plt.title(f'Song {test_index + 1}: Predicted vs Actual Histogram')
    plt.show()

# Print results
for i, (mse, r2) in enumerate(results):
    print(f"Song {i + 1}: MSE = {mse:.4f}, R2 = {r2:.4f}")
