In [1]:
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os
from torchvision import transforms
import cv2
from PIL import Image


In [None]:
class WineDataset(Dataset):
    def __init__(self):
        #dataloading
        data = np.loadtxt('wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
        self.x = torch.from_numpy(data[:, 1:])
        self.y = torch.from_numpy(data[:, [0]])
        
        self.n_samples = data.shape[0]
        
    def __getitem__(self, index):
        # dataset indexing 
        return self.x[index], self.y[index]
        
    def __len__(self):
        return self.n_samples

In [4]:
class MusicDataset(Dataset):
    def __init__(self, csv_path: str, root_dir: str, transform=None):
        # dataloading
        meta_data = pd.read_csv(csv_path)
        song_file_names = get_image_files(root_dir)

        self.n_samples = meta_data.shape[0]
        self.transform = transform
        self.labels = get_labels(meta_data, column_name='label')
        self.data = load_data(song_file_names, transform)
        print(self.data.shape)
        
    def __getitem__(self, index):
        # dataset indexing
        return self.data[index], self.labels[index]
        
    def __len__(self):
        # get length of dataset
        return self.n_samples
        

In [5]:
preprocessing = transforms.Compose([
    transforms.ToTensor(), # converts (H W C) uint8 to (C W H) float32 [0-1]
    transforms.Normalize((0.5124, 0.4420, 0.4994), (0.4354, 0.4721, 0.4593))
])

In [13]:
dataset = MusicDataset('d:/Data/features_30_sec.csv', 'd:/Data/images_original', transform=preprocessing)

torch.Size([3, 288, 432])
torch.Size([999, 3, 288, 432])


In [14]:
train_loader = DataLoader(dataset, batch_size=32)


In [None]:
ypred = torch.ones(32, 10)


In [None]:
cost_fn = torch.nn.CrossEntropyLoss()

In [15]:
for a,b in train_loader:
    print(a)

tensor([[[[1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          ...,
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199]],

         [[1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          ...,
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820]],

         [[1.0899, 1.0899, 1.0899,  ..., 1.0899, 1.0899, 1.0899],
          [1.0899, 1.0899, 1.0899,  ..., 1.0899, 1.0899, 1.0899],
          [1.0899, 1.0899, 1.0899,  ..., 1

tensor([[[[1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          ...,
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199]],

         [[1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          ...,
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820]],

         [[1.0899, 1.0899, 1.0899,  ..., 1.0899, 1.0899, 1.0899],
          [1.0899, 1.0899, 1.0899,  ..., 1.0899, 1.0899, 1.0899],
          [1.0899, 1.0899, 1.0899,  ..., 1

          [1.0899, 1.0899, 1.0899,  ..., 1.0899, 1.0899, 1.0899]]]])
tensor([[[[1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          ...,
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199],
          [1.1199, 1.1199, 1.1199,  ..., 1.1199, 1.1199, 1.1199]],

         [[1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          ...,
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820],
          [1.1820, 1.1820, 1.1820,  ..., 1.1820, 1.1820, 1.1820]],

         [[1.0899, 1.0899, 1.0899,  ..., 1.0899, 1.0899, 1.0899],
          [1.0899, 1.0899, 1.0899,  ...

IndexError: index 999 is out of bounds for dimension 0 with size 999

In [3]:
def get_labels(df, column_name: str):
    labels = df[column_name]
    labels_to_id = labels.apply(list(labels.unique()).index)
    tensor_labels = torch.tensor(labels_to_id)
    return tensor_labels
    
import os
def get_image_files(rootdir: str) -> list:
    #rootdir = 'd:/Data/images_original'
    image_file_locations = []
    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            #print os.path.join(subdir, file)
            filepath = subdir + os.sep + file

            if filepath.endswith(".png"):
                image_file_locations.append(filepath)
    return image_file_locations

In [2]:
def load_data(image_file_locations: list, transform=None):
    #  [batch_size, channels, height, width].
    images = []
    for image_file in image_file_locations:
        current_img = cv2.imread(image_file).astype('uint8')
        if transform is not None:
            # transform/preprocess image
            current_img = transform(current_img)
        else:
            current_img = torch.from_numpy(current_img).float()//255.0
            current_img = torch.einsum('ijk->kij', current_img) # transpose image
        images.append(current_img)
    print(images[0].shape)
    data = torch.stack(images, dim=0)
    return data

In [8]:
import torch
import torch.nn.functional as f

class ResidualLayer(torch.nn.Module):
    def __init__(self, channel_sizes: tuple, is_increase: bool = False, strides: int = 0):
        super(ResidualLayer, self).__init__()
        self.is_increase = is_increase

        # (w - f + 2p) / s + 1
        self.conv_layer_1 = torch.nn.Conv2d(channel_sizes[0], channel_sizes[0] // 4, kernel_size=(1, 1),
                                            stride=(strides, strides))
        self.conv_layer_2 = torch.nn.Conv2d(channel_sizes[0] // 4, channel_sizes[0] // 4, kernel_size=(3, 3),
                                            stride=(1, 1), padding=(1, 1))
        self.conv_layer_3 = torch.nn.Conv2d(channel_sizes[0] // 4, channel_sizes[1], kernel_size=(1, 1), stride=(1, 1))

        if self.is_increase:
            # to increase the channel size and halve the image sizes
            self.conv_layer_4 = torch.nn.Conv2d(channel_sizes[0], channel_sizes[1], kernel_size=(1, 1),
                                                stride=(strides, strides))
            self.bn_4 = torch.nn.BatchNorm2d(channel_sizes[1])

        self.bn_1 = torch.nn.BatchNorm2d(channel_sizes[0] // 4)
        self.bn_2 = torch.nn.BatchNorm2d(channel_sizes[0] // 4)
        self.bn_3 = torch.nn.BatchNorm2d(channel_sizes[1])

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x_identity = x
        x = self.conv_layer_1(x)
        x = self.bn_1(x)
        x = f.gelu(x)

        x = self.conv_layer_2(x)
        x = self.bn_2(x)
        x = f.gelu(x)

        x = self.conv_layer_3(x)
        x = self.bn_3(x)

        if self.is_increase:
            x_identity = self.conv_layer_4(x_identity)
            x_identity = self.bn_4(x_identity)

        return f.gelu(x + x_identity)

In [9]:
class ResNet(torch.nn.Module):
    def __init__(self, input_channel: int, output_channel: int, out_dim: int):
        super(ResNet, self).__init__()
        self.conv_layer_1 = torch.nn.Conv2d(input_channel, output_channel, kernel_size=(7, 7), stride=(2, 2))
        self.max_pool_layer_1 = torch.nn.MaxPool2d(kernel_size=3, stride=2)
        self.bn_1 = torch.nn.BatchNorm2d(output_channel)

        self.res_layer_1 = torch.nn.Sequential(
            *(3 * [ResidualLayer((output_channel, output_channel), strides=1)])
        )
        self.res_layer_2 = torch.nn.Sequential(
            ResidualLayer((output_channel, output_channel * 2), is_increase=True, strides=2),
            *(3 * [ResidualLayer((output_channel * 2, output_channel * 2), strides=1)])
        )
        self.res_layer_3 = torch.nn.Sequential(
            ResidualLayer((output_channel * 2, output_channel * 4), is_increase=True, strides=2),
            *(3 * [ResidualLayer((output_channel * 4, output_channel * 4), strides=1)])
        )
        self.res_layer_4 = torch.nn.Sequential(
            ResidualLayer((output_channel * 4, output_channel * 6), is_increase=True, strides=2),
            *(3 * [ResidualLayer((output_channel * 6, output_channel * 6), strides=1)])
        )

        # classifier part
        self.dense_layer = torch.nn.Linear(192*9*14, out_dim)
        self.fl = torch.nn.Flatten()

    def forward(self, x):
        x = self.conv_layer_1(x)
        x = self.bn_1(x)
        x = self.max_pool_layer_1(f.gelu(x))

        x = self.res_layer_1(x)
        x = self.res_layer_2(x)
        x = self.res_layer_3(x)
        x = self.res_layer_4(x)
        x = self.fl(x)
        x = self.dense_layer(x)

        return x

In [16]:
base_model = ResNet(input_channel=3, output_channel=32, out_dim=10)
num_epochs=10

cost_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(base_model.parameters(), lr=1e-2)



IndexError: index 999 is out of bounds for dimension 0 with size 999

In [20]:
for epoch in range(num_epochs):
    counter = 0
    for data, target in train_loader:
        y_pred = base_model(data)
        cost = cost_fn(y_pred, target)
        cost.backward()
        optimizer.step()
        optimizer.zero_grad()

        counter += 1
        if counter % 100 == 0:
            print(f'At iteration: {counter} the loss is: {cost}')

IndexError: index 999 is out of bounds for dimension 0 with size 999