In [None]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torchvision.models


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.externals import joblib
from sklearn.metrics import accuracy_score



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
def loadData():
  np.random.seed(1000) 
  #Ensuring data is a 224x224 image, used the centercrop function to crop at center
  transform = transforms.Compose([transforms.Resize((224,224)), 
                                transforms.ToTensor()])

  
  numWorkers = 1
  batchSize = 1

  classes = ['COVID-19', 'Normal', 'Pneumonial-Bacterial', 'Pneumonial-Viral']
  #datasetPath = '/content/gdrive/MyDrive/APS360/ProgressReport/APS360SampleData'

  datasetPath = '/content/gdrive/MyDrive/APS360/ProgressReport/SampleDataLarge'


  sampleSet = torchvision.datasets.ImageFolder(datasetPath, transform=transform)
  print(len(sampleSet))
  #All the data loaded is valid so we can use any index
  train = int((len(sampleSet) * 0.8))
  val = int((len(sampleSet) * 0.1))
  test = int((len(sampleSet) * 0.1))

  # Used the random_split data function to split the dataset into a 70, 20, 10 proportion 
  trainData, valData, testData = torch.utils.data.random_split(sampleSet,
                  [train,val,test],generator=torch.Generator().manual_seed(100))
  
  print(trainData, valData, testData)  

  #Load all the data
  trainLoader = torch.utils.data.DataLoader(trainData, batch_size=batchSize, 
                                            num_workers= numWorkers,
                                            shuffle=True)
  valLoader = torch.utils.data.DataLoader(valData, batch_size=batchSize, 
                                          num_workers= numWorkers,
                                                  shuffle=True)
  testLoader = torch.utils.data.DataLoader(testData, batch_size=batchSize, 
                                           num_workers= numWorkers,
                                                  shuffle=True)
  return trainLoader, valLoader, testLoader

In [None]:
test = loadData()
trainLoader = test[0]
valLoader = test[1]
testLoader = test[2]
print(len(trainLoader),len(valLoader),len(testLoader))

950
<torch.utils.data.dataset.Subset object at 0x7ff15ab7e310> <torch.utils.data.dataset.Subset object at 0x7ff15ab7ed10> <torch.utils.data.dataset.Subset object at 0x7ff15ab7ef90>
760 95 95


In [None]:
# classes = ['COVID-19', 'Normal', 'Pneumonial-Bacterial', 'Pneumonial-Viral']
# dataiter = iter(trainLoader)
# images, labels = dataiter.next()
# image = np.transpose(images[0], (1, 2, 0))
# label = classes[labels[0]]
# for images, labels in trainLoader:
#   print(classes[labels[0]])

In [None]:
#links
#https://towardsdatascience.com/dealing-with-multiclass-data-78a1a27c5dcc
#https://www.codementor.io/@agarrahul01/multiclass-classification-using-random-forest-on-scikit-learn-library-hkk4lwawu

In [None]:
train_x = []
train_y = []

for x, y in trainLoader:
  train_x.append(x)
  train_y.append(y)

train_x = torch.stack(train_x)
train_y = torch.stack(train_y)
train_x = train_x.reshape(760, 224*224*3)
print(train_x.shape)
print(train_y.shape)

torch.Size([760, 150528])
torch.Size([760, 1])


In [None]:
test_x = []
test_y = []
for x, y in testLoader:
  test_x.append(x)
  test_y.append(y)

test_x = torch.stack(test_x)
test_x = test_x.reshape(95, 224*224*3)
test_y = torch.stack(test_y)
print(test_x.shape)
print(test_y.shape)

torch.Size([95, 150528])
torch.Size([95, 1])


In [None]:
# Create a Gaussian Classfier
model = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 20)
model.fit(train_x,train_y)
predict2 = model.predict(train_x)
predict = model.predict(test_x)
value = accuracy_score(test_y,predict)
value2 = accuracy_score(train_y,predict2)
print(value)
print(value2)

  


0.7157894736842105
1.0
