## IDEA


1) Analyze one feature at the time to see if it can be used to discriminate one or more classes from the rest
2) Analyze the feature vector as a whole – see if there is any correlation between the vector elements

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

data = pd.read_csv('data/GenreClassData_30s.txt', sep='\t')

# Split the data into training and testing sets
train = data[data['Type'] == 'Train']
test = data[data['Type'] == 'Test']

# Define the features and targets
#features = ['spectral_rolloff_mean', 'mfcc_1_mean', 'spectral_centroid_mean', 'tempo']
features = [
    'zero_cross_rate_mean', 'zero_cross_rate_std', 'rmse_mean', 'rmse_var',
    'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_bandwidth_mean', 'spectral_bandwidth_var',
    'spectral_rolloff_mean', 'spectral_rolloff_var', 'spectral_contrast_mean', 'spectral_contrast_var',
    'spectral_flatness_mean', 'spectral_flatness_var', 'chroma_stft_1_mean', 'chroma_stft_2_mean',
    'chroma_stft_3_mean', 'chroma_stft_4_mean', 'chroma_stft_5_mean', 'chroma_stft_6_mean',
    'chroma_stft_7_mean', 'chroma_stft_8_mean', 'chroma_stft_9_mean', 'chroma_stft_10_mean',
    'chroma_stft_11_mean', 'chroma_stft_12_mean', 'chroma_stft_1_std', 'chroma_stft_2_std',
    'chroma_stft_3_std', 'chroma_stft_4_std', 'chroma_stft_5_std', 'chroma_stft_6_std',
    'chroma_stft_7_std', 'chroma_stft_8_std', 'chroma_stft_9_std', 'chroma_stft_10_std',
    'chroma_stft_11_std', 'chroma_stft_12_std', 'tempo', 'mfcc_1_mean', 'mfcc_2_mean',
    'mfcc_3_mean', 'mfcc_4_mean', 'mfcc_5_mean', 'mfcc_6_mean', 'mfcc_7_mean', 'mfcc_8_mean',
    'mfcc_9_mean', 'mfcc_10_mean', 'mfcc_11_mean', 'mfcc_12_mean', 'mfcc_1_std', 'mfcc_2_std',
    'mfcc_3_std', 'mfcc_4_std', 'mfcc_5_std', 'mfcc_6_std', 'mfcc_7_std', 'mfcc_8_std',
    'mfcc_9_std', 'mfcc_10_std', 'mfcc_11_std', 'mfcc_12_std'
]

targets = ['Genre']


# feature data
X_train = train[features]
# genre data
y_train = train[targets]

X_test, y_test = test[features], test[targets]

In [None]:
plt.close('all') #Clear any existing figures

data_dict = {
    'hiphop': data[data['Genre'] == 'hiphop'],
    'rock': data[data['Genre'] == 'rock'],
    'jazz': data[data['Genre'] == 'jazz'],
    'classical': data[data['Genre'] == 'classical'],
    'reggae': data[data['Genre'] == 'reggae'],
    'blues': data[data['Genre'] == 'blues'],
    'disco': data[data['Genre'] == 'disco'],
    'metal': data[data['Genre'] == 'metal'],
    'country': data[data['Genre'] == 'country'],
    'pop': data[data['Genre'] == 'pop']
}


data_dict_prev = {
   
    'classical': data[data['Genre'] == 'classical'],
    'disco': data[data['Genre'] == 'disco'],
    'metal': data[data['Genre'] == 'metal'],
    'pop': data[data['Genre'] == 'pop']
}

# for key, value in dict.items():

for feature in features:
    fig, axes = plt.subplots(len(data_dict),1, figsize=(8,len(data_dict)*3), sharex=True )
    for ax, (genre, data)  in zip(axes, data_dict.items()):
        ax.hist(data[feature], bins=30, label=feature)
        ax.legend()
        ax.title.set_text(genre)

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(2,2, figsize=(14,8))
for ax, feature in zip(axes.flat, features):

    ax.hist(train[feature], bins=30, alpha=0.5, label='Train', color='r')
    ax.hist(test[feature], bins=30, alpha=0.5, label='Test', color='b')
    ax.legend()
    ax.title.set_text(feature)

In [None]:
pop_data = data[data['Genre'] == 'pop'] 
disco_data = data[data['Genre'] == 'disco'] 
metal_data = data[data['Genre'] == 'metal'] 
classical_data = data[data['Genre'] == 'classical']


fig, axes = plt.subplots(2,2, figsize=(14,8)) 
for ax, feature in zip(axes.flat, features):
    ax.hist(data[feature], bins=30, alpha=0.5, label='Pop', color='r')
    ax.hist(disco_data[feature], bins=30, alpha=0.5, label='Disco', color='b')
    ax.hist(metal_data[feature], bins=30, alpha=0.5, label='Metal', color='g')
    ax.hist(classical_data[feature], bins=30, alpha=0.5, label='Classical', color='y')
    ax.legend()
    ax.title.set_text(feature)

To create a title or section in markdown, use the `#` symbol followed by a space and the title text. The number of `#` symbols determines the heading level.

```markdown
# Title for the Section

## Subsection Title

### Sub-subsection Title
```

For example:

```markdown
# Data Analysis

## Feature Distribution

### Tempo Analysis
```

# The code below separates everything, the features and the genre

In [None]:
'''#pop, disco, metal and classical
pop_data = data[data['Genre'] == 'pop']
disco_data = data[data['Genre'] == 'disco']
metal_data = data[data['Genre'] == 'metal']
classical_data = data[data['Genre'] == 'classical']

# List of genres and their corresponding data
genres = ['Pop', 'Disco', 'Metal', 'Classical']
genre_data = [pop_data, disco_data, metal_data, classical_data]
colors = ['r', 'b', 'g', 'y']

# Create a 4x4 grid of subplots
fig, axes = plt.subplots(4, 4, figsize=(16, 16))

# Loop through each feature and genre to create individual plots
for i, feature in enumerate(features):
    for j, (genre, data, color) in enumerate(zip(genres, genre_data, colors)):
        ax = axes[i, j]
        ax.hist(data[feature], bins=30, alpha=0.5, label=genre, color=color)
        ax.legend()
        ax.title.set_text(f"{feature} - {genre}")

# Adjust layout to prevent overlap
plt.tight_layout()
plt.show()'''