In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import csv
import torch
from torchvision import datasets, transforms
from torch import nn,optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

### Loading The Dataset

To prepare our data for training, we'll have to create a Custom Dataset class using PyTorch's Dataset class. 
The **Dataset class** is an abstract class representing a dataset which we will override here. 
Our custom dataset should inherit Dataset and must override the following methods:

__init__() function is where the initial logic happens like reading a csv, assigning transforms etc.

__getitem__ to support the indexing such that dataset[i] can be used to get the i-th sample of data. Usually returns the image and the label

__len__ so that len(dataset) returns the size of the dataset.

Reference: https://github.com/utkuozbulak/pytorch-custom-dataset-examples

### Data Exploration

In [None]:
#Create a customised data loader
class LoadDataset(torch.utils.data.Dataset):
    def __init__(self,data_path,transforms=None):        
        with open(data_path, newline='') as csvfile:
            a = list(csv.reader(csvfile))
            self.transforms = transforms
            self.data =  np.array(a)
            print(self.data.shape)
            
    def __getitem__(self, index):
        data_ori = torch.from_numpy(np.matri(self.data[index], dtype='float32'))
        data = data_ori[:,:187]
        label = data_ori[188]
        
         # Transform data to tensor
        if self.transforms is not None:
            data = self.transforms(data)
        return data, label
    
    def __len__(self):
        return len(self.data)
# Load our training set
train_transforms = transforms.Compose([transforms.ToTensor()])
trainset = LoadDataset('../input/mitbih_train.csv',train_transforms)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 32,shuffle = True)

# Load our test set
test_transforms = transforms.Compose([transforms.ToTensor()])
testset = LoadDataset('../input/mitbih_test.csv',test_transforms)
testloader = torch.utils.data.DataLoader(trainset, batch_size = 32,shuffle = True)
# print some stats about the dataset
print('Length of dataset: ', len(trainset))

In [None]:
#Method For Data Exporation
def exploreData(dataframe):
    # Preview dataset
    print("Dataset Head")
    print(dataframe.head(3))
    print("--" * 50)
    
    # Features in dataset
    print("Dataset Atrributes")
    print(dataframe.columns.values)
    print("--" * 50)
    
     # view distribution of numerical features across the data set
    print("Dataset Numerical Features")
    print(dataframe.describe())
    print("--" * 50)
    
    # View How many samples and how many missing values for each feature
    print("Dataset Features Details")
    print(dataframe.info())
    print("--" * 50)
    
    # view distribution of categorical features across the data set
#     print("Dataset Categorical Features")
#     print(dataframe.describe(include=['O']))
#     print("--" * 50)
        
    #Checking for missing values
    print("Check for Missing Values")
    print(dataframe.isnull().sum())
    print("--" * 50)

     #Get number of instances and number of attributes
    print("Number of Instances and Attributes")
    print(dataframe.shape)
    print("--" * 50)

train_dataframe = pd.read_csv("../input/mitbih_train.csv", header=None)
test_dataframe = pd.read_csv("../input/mitbih_test.csv", header=None)
exploreData(train_dataframe)
# Distribution of Output Variable
train_dataframe[187].value_counts()

### Observations
- No missing values
- 109446 samples
- 188 attributes
- Dataset is unbalanced
Data was already filtered and beats extracted according to the paper **ECG Heartbeat Classification: A Deep Transferable
Representation*

### Visual Exploration

In [None]:
# Display a few of the images from the dataset
num_to_display = 3

for i in range(num_to_display):
    
    # define the size of images
    fig = plt.figure(figsize=(20,10))
    
    # randomly select a sample
    rand_i = np.random.randint(0, len(trainset))
    sample = trainset[rand_i]

    # print the shape of the image and keypoints
    print(i, sample)
    #print(i, sample['image'].shape, sample['keypoints'].shape)

#     ax = plt.subplot(1, num_to_display, i + 1)
#     ax.set_title('Sample #{}'.format(i))
    
#     # Using the same display function, defined earlier
#     show_keypoints(sample['image'], sample['keypoints'])