<a href="https://colab.research.google.com/github/jadaksnyder/Artificial-Intelligence/blob/main/Project_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import zipfile
import os

zip_path = "/content/drive/My Drive/archive.zip"  # Make sure the file exists here
extract_folder = "/content/gtzan_data"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

print("Extraction complete!")
print(os.listdir(extract_folder))  # Check extracted files


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/My Drive/archive.zip'

In [None]:
from  google.colab import drive
drive.mount('/content/drive')

In [None]:

import os
import librosa
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torchaudio.transforms as TabError

#define 10 genres with an array
GENRES = ["blues", "classical", "country", "disco", "hiphop",
          "jazz", "metal", "pop", "reggae", "rock"]

#extract Mel-Frequency Cepstral Coefficients from audio file
def extract_features(file_path, n_mfcc=40):
  y, sr = librosa.load(file_path, sr=22050) #y = actual audio waveform as numpy array.
  mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc) #sr = sampling rate
  return np.mean(mfcc.T, axis=0) #return mean over time axis

#create dataset class
class MusicGenreDataset(Dataset):
  def __init__(self, root_dir):
    self.root_dir = root_dir
    self.data = []

    for genre in GENRES:
      genre_dir = os.path.join(root_dir, genre)
      for file_name in os.listdir(genre_dir):
        if file.endswith(".wav"):
          file_path = os.path.join(genre_dir, file_name)
          features = extract_features(file_path)
          label = GENRES.index(genre)
          self.data.append((features, GENRES.index(genre)))
  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    features, label = self.data[idx]
    return torch.tensor(self.data[idx][0], dtype=torch.float32), self.data[idx][1]

#load the dataset

dataset = MusicGenreDataset("GTZAN")

#create dataloader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)





In [None]:
import torch.nn as nn
import torch.optim as optim

#define the CNN Model
class GenreClassifier(nn.Module):
  def __init__(self):
    super(GenreClassifier, self).__init__()
    self.conv1 = nn.Conv1d(in_channels=40, out_channels=64, kernel_size=3)
    self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3)
    self.fc1 = nn.Linear(128 * 8, 256)
    self.fc2 = nn.Linear(256, len(GENRES))
    self.relu = nn.ReLU()
    self.pool = nn.MaxPool1d(2)

  def forward(self, x):
    x = self.pool(self.relu(self.conv1(x)))
    x = self.pool(self.relu(self.conv2(x)))
    x = x.view(x.size(0), -1)
    x = self.relu(self.fc1(x))
    x = self.fc2(x)
    return x

#initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GenreClassifier().to(device)

#loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
#training the model
def train_model(model, dataloader, epochs=10):
  model.train()
  for epoch in range(epochs):
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
      inputs, labels = inputs.to(device), labels.to(device)
      inputs = inputs.unqueeze(2)

      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      _, predicted = torch.max(outputs, 1)
      correct += (predicted == labels).sum().item()
      total += labels.size(0)

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.4f}, Accuracy: {correct/total:.4f}")

    #train the model
    train_model(model, dataloader)

In [None]:
#test the model
def predict_genre(model, file_path):
  model.eval()
  mfcc_features = extract_features(file_path)
  inputs = torch.tensor(mfcc_features, dtype=torch.float32).unsqueeze(0).unsqueeze(2).to(device)

  with torch.no_grad():
    output = model(inputs)
    _, predicted = torch.max(output, 1)
  return GENRES[predicted.item()]

test_song = "/mnt/data/rock.00006.wav"
predicted_genre = predict_genre(model, test_song)
print(f"The predicted genre is: {predicted_genre}")