In [1]:
import os
import torchaudio
import torchaudio.transforms as T
import numpy as np
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from PIL import Image
from collections import defaultdict
from sklearn.model_selection import train_test_split
from torchvision import datasets, transforms
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
def audio_to_image(audio_folder, image_folder):
    # List the subfolders in the audio directory
    stress_conditions = os.listdir(audio_folder)

    # Create image folders if they don't exist already
    for condition in stress_conditions:
        os.makedirs(os.path.join(image_folder, condition), exist_ok=True)

    # Process audio files in each stress condition folder
    for condition in stress_conditions:
        audio_path = os.path.join(audio_folder, condition)
        image_path = os.path.join(image_folder, condition)

        # Process each audio file in the current stress condition folder
        for audio_file in os.listdir(audio_path):
            # Load audio file
            waveform, sample_rate = torchaudio.load(os.path.join(audio_path, audio_file))
            # Convert audio to spectrogram
            spec_transform = T.Spectrogram()
            spectrogram = spec_transform(waveform)

            # Reverse the axis - Time(x-axis) & Frequency (y-axis)
            spectrogram = spectrogram.transpose(1, 2)

            # Convert to decibels
            #spectrogram_db = T.AmplitudeToDB()(spectrogram)

            # Convert spectrogram to a numpy array
            spectrogram = spectrogram[0].numpy()

            # Save the spectrogram image in the respective folder
            image_file = os.path.splitext(audio_file)[0] + '.png'
            image_save_path = os.path.join(image_path,f"{condition}_{image_file}")
            plt.imsave(image_save_path, spectrogram, cmap='gray', origin='lower')

In [3]:
# Provide paths to your audio and image folders
audio_folder_path = 'AUDIOFILES'
image_folder_path = 'IMAGEFILES'

In [4]:
# Convert audio files to spectrogram images and save them in respective folders
audio_to_image(audio_folder_path, image_folder_path)

RuntimeError: Couldn't find appropriate backend to handle uri AUDIOFILES\Empty Pot\id_0_sound_1.wav and format None.

In [None]:
plt.figure(figsize = (12,6))

plt.subplot(2,2,1)
path = 'IMAGEFILES/Tobacco Cut/Tobacco Cut_id_127_sound_10.png'
tobacco_cut = plt.imread(path)
plt.imshow(tobacco_cut, aspect = 'auto')
plt.title('Tobacco Cut')

plt.subplot(2,2,2)
path = 'IMAGEFILES/Tobacco Dry/Tobacco Dry_id_10_sound_1.png'
tobacco_dry = plt.imread(path)
plt.imshow(tobacco_dry, aspect = 'auto')
plt.title('Tobacco Dry')

plt.subplot(2,2,3)
path = 'IMAGEFILES/Tomato Cut/Tomato Cut_id_127_sound_10.png'
tomato_cut = plt.imread(path)
plt.imshow(tomato_cut, aspect = 'auto')
plt.title('Tomato Cut')

plt.subplot(2,2,4)
path = 'IMAGEFILES/Tomato Dry/Tomato Dry_id_101_sound_1.png'
tomato_dry = plt.imread(path)
plt.imshow(tomato_dry, aspect = 'auto')
plt.title('Tomato Dry')

plt.tight_layout()  # Adjusts subplot parameters to fit plots into the figure area
plt.show()

In [None]:
# Define transformations
data_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),# Convert to grayscale
    transforms.Resize((224, 224)),  # Resize the image
    transforms.ToTensor(),  # Convert image to tensor
])

In [None]:
root_dir = 'IMAGEFILES'
folders = os.listdir(root_dir)
folders

In [None]:
# Load images and assign labels
data = []
labels = []
filenames = []

for folder in folders:
    folder_path = os.path.join(root_dir, folder)
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        img = Image.open(img_path)
        data.append(img)
        labels.append(folder)
        filenames.append(filename)

In [None]:
# Renaming Labels with Integers
class_indices = {
    'Empty Pot' : 0,
    'Tobacco Cut': 1,
    'Tobacco Dry': 2,
    'Tomato Cut': 3,
    'Tomato Dry': 4,
    'Greenhouse Noises': 5
}

# Convert label names to class indices
labels = [class_indices[label] for label in labels]

In [None]:
# Code to check the duplicate names in different Condition Folders
root_dir = 'IMAGEFILES'
folders = os.listdir(root_dir)

file_dict = defaultdict(list)
cnt = 0

for folder in folders:
    folder_path = os.path.join(root_dir, folder)
    for filename in os.listdir(folder_path):
        file_dict[filename].append(folder)

duplicate_files = {filename: folders for filename, folders in file_dict.items() if len(folders) > 1}

if duplicate_files:
    print("Duplicate files found in the following folders:")
    for filename, folders in duplicate_files.items():
        print(f"Filename: {filename}, Folders: {folders}")
        cnt+=1
    print('Total Duplicate Files', cnt)
else:
    print("No duplicate files found.")

In [None]:
# Split data into train and test sets (80% train, 20% test)
train_data, test_data, train_labels, test_labels, train_filenames, test_filenames = train_test_split(
    data, labels, filenames, test_size=0.2, random_state=42, stratify=labels
)

In [None]:
# Create custom datasets
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img = self.data[index]
        label = self.labels[index]
        if self.transform:
            img = self.transform(img)
        return img, label

In [None]:
# Create train and test datasets
train_dataset = CustomDataset(train_data, train_labels, transform=data_transforms)
test_dataset = CustomDataset(test_data, test_labels, transform=data_transforms)

In [None]:
# Define batch size
batch_size = 16

# Create DataLoader objects
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes=6):
        super(CustomCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.25)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(0.25)

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout3 = nn.Dropout(0.25)

        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout4 = nn.Dropout(0.25)

        # Fully connected layers
        self.fc1 = nn.Linear(256 * 14 * 14, 512)
        self.dropout5 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        # Convolutional layers with batch normalization, dropout, ReLU activation, and MaxPool
        x = self.maxpool1(self.dropout1(self.relu(self.bn1(self.conv1(x)))))
        x = self.maxpool2(self.dropout2(self.relu(self.bn2(self.conv2(x)))))
        x = self.maxpool3(self.dropout3(self.relu(self.bn3(self.conv3(x)))))
        x = self.maxpool4(self.dropout4(self.relu(self.bn4(self.conv4(x)))))
        #print('Conv Output', x.shape)

        # Flatten for fully connected layers
        x = x.view(x.size(0), -1)

        # Fully connected layers with dropout
        x = self.dropout5(self.relu(self.fc1(x)))
        x = self.fc2(x)

        return x

In [None]:
# Checking if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(torch.cuda.get_device_name(0))  # This will print the name of your GPU
else:
    device = torch.device("cpu")
    print("No GPU available, using CPU instead.")

In [None]:
# Set device to GPU as its available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# instantiating the model
model = CustomCNN()
model = model.to(device)  # Move the model to GPU

In [None]:
# printing total number of parameters in the model
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters in the model: {total_params}")

In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop
epochs = 10  # Number of epochs
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):

        inputs, labels = inputs.to(device), labels.to(device)   # Move data to GPU

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f"Epoch [{epoch + 1}/{epochs}], "
                  f"Batch [{i + 1}/{len(train_loader)}], "
                  f"Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

print('Finished Training')

In [None]:
correct = 0
total = 0
model.eval()  # Set the model to evaluation mode

with torch.no_grad():  # Disable gradient computation
    for inputs, labels in test_loader:  # Iterate over the test data
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU

        outputs = model(inputs)  # Forward pass
        _, predicted = torch.max(outputs.data, 1)  # Get the predicted class

        total += labels.size(0)  # Total number of labels
        correct += (predicted == labels).sum().item()  # Number of correct predictions

accuracy = 100 * correct / total
print(f'Accuracy on the test set: {accuracy:.2f}%')