In [1]:
#Importing the Libraries
import os   #used for listing directories in a Folder, or Items in a File
import torch  # Pytorch Neural Network Package
import pandas as pd    # To import csv Files
import numpy as np      # To operate on Numpy Arrays
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader   # To create a custom DataSet in Pytorch
from torchvision import transforms, utils     # To operate on Datasets such as Normilazation
import pydicom    # To deal Operate on DICOM Images

In [30]:
#Let us a create a class for the Training Dataset

class PnomniaData(Dataset):
    
    # Lets create a PnemoniaDataset Object Creator
    def __init__(self , csv_file , root_dir , transform ):
        # Let us give the property of the oject a csv File
        self.data = pd.read_csv(csv_file)
        #Let us set the Root Directory
        self.root_dir = root_dir
        self.transform = transform
        
        
    # Let us Find the Size of the DataSet
    def __len__(self):
        return len(self.data)-1
    
    #Lets Create a method to Get an Item of the Dataset
    def __getitem__(self, idx):  #Idx is the index of the wanted Item of The Dataset
        # using the iloc  Property let us get an Item of the Csv
        patientData = self.data.iloc[idx]
        patiendId = patientData['patientId'] #let us get a patient id from the PatientId Colounm
        #let us create a coloumn matrix(its size is 5), this matrix contains x,y,width,height,target, we do so by concatinating them
        x = patientData['x']  #let us get the x coodinate  from the "x" Colounm
        y = patientData['y']
        width = patientData['width']
        height = patientData['height']
        target = patientData['Target']
        XandY = np.concatenate((x,y) ,axis = None)
        XandYandWidth = np.concatenate((XandY,width), axis = None)
        XandYandWidthAndHight = np.concatenate((XandYandWidth,height), axis = None)
        XandYandWidthAndHightTarget = np.concatenate((XandYandWidthAndHight,target), axis = None)
        
        #With the Patient Id concatinated with the root director, lets go the Photos folder to get a DICOM Image
        dcm_file = str(self.root_dir)+ patiendId +".dcm"
        dcm_data = pydicom.read_file(dcm_file)
        #let us get the pixels of the DICOM Image
        image = dcm_data.pixel_array
        
        #Let us create a dictionary. This dictionary has an Id, the x,y.. coodicates and the image pixels of the DICOM Image)
        sample = {'patientId':patiendId, 'XandYandWidthAndHightTarget':XandYandWidthAndHightTarget, 'image':image}
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample
    

#A class to convert to Our Data to Pytorch Tensors
class ToTensor(object):
    
    def __call__(self, sample):
        patientId ,XandYandWidthAndHightTarget, image = sample['patientId'],sample['XandYandWidthAndHightTarget'], sample['image']
        XandYandWidthAndHightTarget = np.array([XandYandWidthAndHightTarget])
        XandYandWidthAndHightTarget=XandYandWidthAndHightTarget.reshape(-1,1)
        XandYandWidthAndHightTarget = torch.from_numpy(XandYandWidthAndHightTarget)
        image = image[..., np.newaxis]
        XandYandWidthAndHightTarget = XandYandWidthAndHightTarget.type(torch.FloatTensor)
        image = image.transpose((2, 0, 1))
        image = torch.from_numpy(image)
        image = image.type(torch.FloatTensor)
        
        return  {'patientId':patientId, 'XandYandWidthAndHightTarget':XandYandWidthAndHightTarget , 'image':image}
    
    
class MeanSubtraction(object):
    def __init__(self,mean):
        self.mean = mean
        
    def __call__(self,sample):
        patientId ,XandYandWidthAndHightTarget, image = sample['patientId'],sample['XandYandWidthAndHightTarget'], sample['image']
        for i in range(image.shape[0]):
            image[:,:,i] -=  0.5
            
            return  {'patientId':patientId, 'XandYandWidthAndHightTarget':XandYandWidthAndHightTarget , 'image':image}
            

    
    
 

    
    

In [31]:
 #Let us a create a class for the Testing Dataset
class PnomniaDataTesting(Dataset):
    
    def __init__(self , root_dir , transform ):
        self.root_dir = root_dir
        self.transform = transform
        
        
    def __len__(self):
        self.listOfFiles = os.listdir(self.root_dir)
        return len(self.listOfFiles)
    
    def __getitem__(self, idx):
        patientIdWithExtention = self.listOfFiles[idx]
        patientId = patientIdWithExtention[:-4]
        dcm_file = str(self.root_dir) + patientIdWithExtention
        dcm_data = pydicom.read_file(dcm_file)
        image = dcm_data.pixel_array
        
        sample = {'patientId':patiendId, 'image':image}
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample
class ToTensorTesting(object):
    
    def __call__(self, sample):
        patientId , image = sample['patientId'], sample['image']
        image = image[..., np.newaxis]
        image = image.transpose((2, 0, 1))
        image = torch.from_numpy(image)
        
        return  {'patientId':patientId, 'image':image}
    

In [32]:
all_transforms = transforms.Compose([ToTensor()])


trainingDataset = PnomniaData(csv_file='/Users/abdulsalamyazid/Desktop/Projects/RSNA Pneumonia/Dataset/train_label.csv',
                             root_dir = '/Users/abdulsalamyazid/Desktop/Projects/RSNA Pneumonia/Dataset/train_images/',
                             transform = all_transforms)

#testingDataset = PnomniaDataTesting(root_dir = '/Users/abdulsalamyazid/Desktop/Projects/Dataset/test_images/',
                             #transform = ToTensorTesting())




print(trainingDataset.__len__())
for i in range (len(trainingDataset)):
    sample = trainingDataset[i]
    print(i,sample['image'].size())
    
    if i == 0:
        break
        

30226
0 torch.Size([1, 1024, 1024])


In [33]:
#How to convert a numpy array to Pytorch Torch
x = np.array([2,3,1,0])
print(type(x))
x = torch.from_numpy(x)
print(type(x))

<class 'numpy.ndarray'>
<class 'torch.Tensor'>


In [34]:
trainloader = DataLoader(trainingDataset , batch_size = 4,
                        shuffle = True ,num_workers = 4)
#testloader = DataLoader(testingDataset , batch_size = 4,
                       # shuffle = True ,num_workers = 4)


In [36]:
mean_sum = 0.
std = 0.
nb_samples = 0.
for data in trainloader:
    batch_samples = data['image'].size(0)
    #print("batch_samples")
    #print(batch_samples )
    data['image'] = data['image'].view(batch_samples, data['image'].size(1), -1)
    #print("data size")
    #print(data['image'].size())
    #data['image']
    mean += data['image'].mean(2).sum(0)
    std += data['image'].std(2).sum(0)
    nb_samples += batch_samples

mean /= 30226.0
std /= 30226.0

print("samples number")
print(nb_samples)
print("mean is")
print(mean)
print("std is")
print(std)


samples number
30226.0
mean is
tensor([ 124.5482])
std is
tensor([ 58.0486])


In [29]:
print(nb_samples)

30226.0


In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Neti(nn.Module):
    def __init__(self):
        super(Neti, self).__init__()
        self.conv1 = nn.Conv2d(1,20,10)
        self.conv2 = nn.Conv2d(20,40,10)
        self.conv3 = nn.Conv2d(40,64,10)
        self.fc1 = nn.Linear(1600, 120)
        self.fc2 = nn.Linear(120,100)
        self.fc3 = nn.Linear(100,5)
        
    def forward(self,x):
        x = F.max_pool2d(F.relu(self.conv1(x)),(5,5))
        x = F.max_pool2d(F.relu(self.conv2(x)),5)
        x = F.max_pool2d(F.relu(self.conv3(x)),5)
        x = x.view(-1,self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x
    
    def num_flat_features(self,x):
        size = x.size()[1:]
        num_fetures = 1
        for s in size:
            num_fetures *=s
        return num_fetures
    
    

In [12]:
my_net = Neti()
for name, paremeter in my_net.named_parameters():
    print(name,"\t\t", paremeter.shape)

conv1.weight 		 torch.Size([20, 1, 10, 10])
conv1.bias 		 torch.Size([20])
conv2.weight 		 torch.Size([40, 20, 10, 10])
conv2.bias 		 torch.Size([40])
conv3.weight 		 torch.Size([64, 40, 10, 10])
conv3.bias 		 torch.Size([64])
fc1.weight 		 torch.Size([120, 1600])
fc1.bias 		 torch.Size([120])
fc2.weight 		 torch.Size([100, 120])
fc2.bias 		 torch.Size([100])
fc3.weight 		 torch.Size([5, 100])
fc3.bias 		 torch.Size([5])


In [45]:
print(sample['image'].size())
#sample['image'] = sample['image'].unsqueeze_(0)
#print(sample['image'].size())

#mytensor = sample['image'].type(torch.FloatTensor)
#mytensor = mytensor.unsqueeze_(0)
#print(mytensor.type())
#print(mytensor.size())

#net = Net()
#lets try an input 
output = net(mytensor)
print(output)


torch.Size([1, 1024, 1024])


RuntimeError: expected stride to be a single integer value or a list of 3 values to match the convolution dimensions, but got stride=[1, 1]

In [46]:
#Lets create our optimizer
import torch
import torch.nn as nn
import torch.optim as optim
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = 0.001 , momentum = 0.9 )

In [47]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader):
   
        # get the inputs
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

IndexError: Traceback (most recent call last):
  File "/Users/abdulsalamyazid/anaconda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 57, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/Users/abdulsalamyazid/anaconda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 57, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "<ipython-input-6-1fec68f98833>", line 21, in __getitem__
    patientData = self.data.iloc[idx]
  File "/Users/abdulsalamyazid/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py", line 1328, in __getitem__
    return self._getitem_axis(key, axis=0)
  File "/Users/abdulsalamyazid/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py", line 1749, in _getitem_axis
    self._is_valid_integer(key, axis)
  File "/Users/abdulsalamyazid/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py", line 1638, in _is_valid_integer
    raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
