In [3]:
import os
import pandas as pd
import cv2
from tqdm import tqdm
import numpy as np
import json
np.random.seed(0)
import colorsys
from google.colab.patches import cv2_imshow
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.ensemble import AdaBoostClassifier
from imblearn.metrics import geometric_mean_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader


In [4]:
# mounting google drive for running in google colab, comment if running locally and update directories in the next cell
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# change if running locally
image_files = os.listdir('./drive/MyDrive/QC/rgb')
csv_files = os.listdir('./drive/MyDrive/QC/csv')

print(f"Found - {len(image_files)} images & {len(csv_files)} labels")

Found - 1744 images & 1744 labels


In [6]:
a = np.random.choice(np.arange(0,len(csv_files)),size=250, replace = False).tolist()
with open("./chosen_indices.json","w") as f:
  json.dump({
      "indices" : a,
  }, f)

In [7]:
def prepare_dataset(csv_files, slice_ = 36, destination = "./processed_data", convert_to_gray_scale = False):

  print("Reading nuclei labels - ")
  data = pd.DataFrame()

  for f in tqdm(csv_files):
    temp = pd.read_csv('./drive/MyDrive/QC/csv/' + f, engine=  "python")
    temp["slide"] = f.split(".")[0]
    temp.drop(columns= ["Unnamed: 0"], inplace=True)
    data = pd.concat([data, temp], axis=0)  
  
  print("Read nuclei labels, removing files with smaller than required slices")
  data["xrange"] = data["xmax"] - data["xmin"]
  data["yrange"] = data["ymax"] - data["ymin"]  
  data["aspect_ratio"] = data["xrange"]/data["yrange"]

  print("Removed smaller slices, processing labels")

  data = data[data["raw_classification"] != "unlabeled"]
  print(data["raw_classification"].value_counts())
  data["label"] = data["raw_classification"].apply(lambda x: x if x=="tumor" else "not-tumor")

  print("Processed labels - ", data["label"].value_counts())
  print("Reading files - ")
  if not os.path.isdir(destination):
    os.mkdir(destination)
  else:
    print("Overwriting existing data")
  final_labelled_data = {
      "filename" : [],
      "label" : []
  }
  i = 0
  for f in tqdm(data["slide"].unique()):
    subset = data[data["slide"] == f]
    filename = './drive/MyDrive/QC/rgb/' + f + ".png"
    image = cv2.imread(filename)

    for row in subset.iterrows():
      row = row[1]
      image_ = image[row["ymin"]:row["ymax"]+1, row["xmin"]:row["xmax"]+1]
      x = int(row["xmin"] + row["xrange"]/2)
      y = int(row["ymin"] + row["yrange"]/2)
      if x<18 or y<18 or x+18>image.shape[0] or y+18>image.shape[1]:
        continue
      image_ = image[x-18:x+18, y-18:y+18]
      image_ = cv2.resize(image_, (slice_, slice_), interpolation = cv2.INTER_AREA)
      if convert_to_gray_scale:
        assert slice_ == 28
        image_ = cv2.cvtColor(image_, cv2.COLOR_RGB2GRAY)        
        image_ = cv2.resize(image_, (slice_, slice_), interpolation = cv2.INTER_AREA)
      np.save(f"{destination}/{i}", np.moveaxis(image_,-1,0))
      flipped_image_ = cv2.flip(image_, 1)
      np.save(f"{destination}/{i}_flipped", np.moveaxis(flipped_image_,-1,0))
      final_labelled_data["filename"].append(i)
      final_labelled_data["label"].append(row["label"])

      rot_image_ = cv2.rotate(image_, cv2.ROTATE_90_CLOCKWISE)
      np.save(f"{destination}/{i}_rot90", np.moveaxis(rot_image_,-1,0))

      rot_image_ = cv2.rotate(image_, cv2.ROTATE_90_COUNTERCLOCKWISE)
      np.save(f"{destination}/{i}_rot270", np.moveaxis(rot_image_,-1,0))

      rot_image_ = cv2.rotate(image_, cv2.ROTATE_180)
      np.save(f"{destination}/{i}_rot180", np.moveaxis(rot_image_,-1,0))

      rot_image_ = cv2.rotate(flipped_image_, cv2.ROTATE_90_CLOCKWISE)
      np.save(f"{destination}/{i}_flipped_rot90", np.moveaxis(rot_image_,-1,0))

      rot_image_ = cv2.rotate(flipped_image_, cv2.ROTATE_90_COUNTERCLOCKWISE)
      np.save(f"{destination}/{i}_flipped_rot270", np.moveaxis(rot_image_,-1,0))

      rot_image_ = cv2.rotate(flipped_image_, cv2.ROTATE_180)
      np.save(f"{destination}/{i}_flipped_rot180", np.moveaxis(rot_image_,-1,0))
      i+=1
  
  final_labelled_data = pd.DataFrame(final_labelled_data)
  print(final_labelled_data["label"].value_counts())
  final_labelled_data.to_csv(f"{destination}/final_labelled_data.csv", index = None)

In [8]:
csv_files = np.asarray(csv_files)
prepare_dataset(csv_files[a])

Reading nuclei labels - 


100%|██████████| 250/250 [00:36<00:00,  6.86it/s] 


Read nuclei labels, removing files with smaller than required slices
Removed smaller slices, processing labels
tumor                   2725
lymphocyte              1959
fibroblast              1215
plasma_cell              855
macrophage               246
ductal_epithelium        124
vascular_endothelium      82
apoptotic_body            58
mitotic_figure            29
myoepithelium             17
neutrophil                 1
eosinophil                 1
Name: raw_classification, dtype: int64
Processed labels -  not-tumor    4587
tumor        2725
Name: label, dtype: int64
Reading files - 


100%|██████████| 250/250 [03:54<00:00,  1.07it/s]

not-tumor    4112
tumor        2497
Name: label, dtype: int64





In [9]:
prepare_dataset(csv_files[a], slice_ = 28, destination="./processed_grayscale_data", convert_to_gray_scale = True)

Reading nuclei labels - 


100%|██████████| 250/250 [00:01<00:00, 195.82it/s]


Read nuclei labels, removing files with smaller than required slices
Removed smaller slices, processing labels
tumor                   2725
lymphocyte              1959
fibroblast              1215
plasma_cell              855
macrophage               246
ductal_epithelium        124
vascular_endothelium      82
apoptotic_body            58
mitotic_figure            29
myoepithelium             17
neutrophil                 1
eosinophil                 1
Name: raw_classification, dtype: int64
Processed labels -  not-tumor    4587
tumor        2725
Name: label, dtype: int64
Reading files - 


100%|██████████| 250/250 [00:11<00:00, 22.09it/s]

not-tumor    4112
tumor        2497
Name: label, dtype: int64





In [10]:
all_data = pd.read_csv("./processed_data/final_labelled_data.csv")
total = np.arange(0,np.max(all_data["filename"].values))
x_train_idx, x_test_idx = train_test_split(total, test_size = 0.2)

In [11]:
def fetch_data(filename = "./processed_data/final_labelled_data.csv", destination = "./processed_data/"):
  global x_train_idx, x_test_idx
  all_data = pd.read_csv(filename)
  total = np.arange(0,np.max(all_data["filename"].values))
  x_train = []
  y_train = []
  x_test = []
  y_test = []
  train_data = all_data[all_data["filename"].isin(x_train_idx)]
  print(train_data.shape)
  for i, row in train_data.iterrows():
    data = np.load(destination + str(row["filename"]) + ".npy")
    # data = cv2.cvtColor(data, cv2.COLOR_RGB2XYZ)
    x_train.append(data/255.)
    y_train.append(1 if row["label"]=="tumor" else 0)

    data = np.load(destination + str(row["filename"]) + "_flipped.npy")
    # data = cv2.cvtColor(data, cv2.COLOR_RGB2XYZ)
    x_train.append(data/255.)
    y_train.append(1 if row["label"]=="tumor" else 0)

    data = np.load(destination + str(row["filename"]) + "_rot90.npy")
    # data = cv2.cvtColor(data, cv2.COLOR_RGB2XYZ)
    x_train.append(data/255.)
    y_train.append(1 if row["label"]=="tumor" else 0)    

    data = np.load(destination + str(row["filename"]) + "_rot180.npy")
    # data = cv2.cvtColor(data, cv2.COLOR_RGB2XYZ)
    x_train.append(data/255.)
    y_train.append(1 if row["label"]=="tumor" else 0)

    data = np.load(destination + str(row["filename"]) + "_rot270.npy")
    # data = cv2.cvtColor(data, cv2.COLOR_RGB2XYZ)
    x_train.append(data/255.)
    y_train.append(1 if row["label"]=="tumor" else 0)

    data = np.load(destination + str(row["filename"]) + "_flipped_rot90.npy")
    # data = cv2.cvtColor(data, cv2.COLOR_RGB2XYZ)
    x_train.append(data/255.)
    y_train.append(1 if row["label"]=="tumor" else 0)    

    data = np.load(destination + str(row["filename"]) + "_flipped_rot180.npy")
    # data = cv2.cvtColor(data, cv2.COLOR_RGB2XYZ)
    x_train.append(data/255.)
    y_train.append(1 if row["label"]=="tumor" else 0)

    data = np.load(destination + str(row["filename"]) + "_flipped_rot270.npy")
    # data = cv2.cvtColor(data, cv2.COLOR_RGB2XYZ)
    x_train.append(data/255.)
    y_train.append(1 if row["label"]=="tumor" else 0)
  test_data = all_data[all_data["filename"].isin(x_test_idx)]
  for i, row in test_data.iterrows():
    data = np.load(destination + str(row["filename"]) + ".npy")
    # data = cv2.cvtColor(data, cv2.COLOR_RGB2XYZ)
    x_test.append(data/255.)
    y_test.append(1 if row["label"]=="tumor" else 0)
  return np.asarray(x_train), np.asarray(y_train), np.asarray(x_test), np.asarray(y_test)

#LeNet and Color LeNet

In [12]:
class color_lenet(nn.Module):
  def __init__(self):
    super(color_lenet, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=50, kernel_size = 5)
    self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
    self.conv2 = nn.Conv2d(in_channels=50, out_channels=100, kernel_size=5)
    self.fc1 = nn.Linear(in_features=3600, out_features=500)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(in_features=500, out_features=1)
    self.sigmoid = nn.Sigmoid()
  
  def forward(self, x):
    x = self.conv1(x)
    x = self.maxpool(x)
    x = self.conv2(x)
    x = self.maxpool(x)
    x = x.view(x.shape[0],-1)
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    x = self.sigmoid(x)
    return x

In [13]:
class lenet(nn.Module):
  def __init__(self):
    super(lenet, self).__init__()
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=20, kernel_size = 5)
    self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
    self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=5)
    self.fc1 = nn.Linear(in_features=800, out_features=500)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(in_features=500, out_features=1)
    self.sigmoid = nn.Sigmoid()
  
  def forward(self, x):
    x = self.conv1(x)
    x = self.maxpool(x)
    x = self.conv2(x)
    x = self.maxpool(x)
    x = x.view(x.shape[0],-1)
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    x = self.sigmoid(x)
    return x

In [14]:
x_train, y_train, x_test, y_test = fetch_data()
x_train.shape

(5286, 2)


(42288, 3, 36, 36)

In [15]:
tensor_x = torch.from_numpy(x_train)
tensor_y = torch.from_numpy(y_train)

train_dataset = TensorDataset(tensor_x, tensor_y)
train_dataloader = DataLoader(train_dataset, batch_size = 64)

In [60]:
NUM_EPOCHS = 82
losses = []

model = color_lenet()
model.train()
model.cuda()
loss_fn = nn.BCELoss()
sgd = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9, weight_decay = 5e-4)
sgd2 = optim.SGD(model.parameters(), lr = 0.0001, momentum = 0.9, weight_decay = 5e-4)

for epoch in range(NUM_EPOCHS):
  avg_loss = 0
  for i , data in enumerate(train_dataloader):
    x,y = data
    x = x.cuda()
    y = y.cuda()
    if epoch < 70:
      sgd.zero_grad()
    else:
      sgd2.zero_grad()
    pred = model(x.float())
    loss = loss_fn(pred, y.float().view(-1,1))
    loss.backward()

    if epoch < 70:
      sgd.step()
    else:
      sgd2.step()
    avg_loss += loss.item()
    losses.append(avg_loss/len(train_dataloader))
  if (epoch+1)%5 == 0:
    print(f"Epoch - {epoch+1}, loss = {avg_loss/len(train_dataloader)}")

Epoch - 5, loss = 0.6669192907940061
Epoch - 10, loss = 0.6533789293320621
Epoch - 15, loss = 0.627954635279339
Epoch - 20, loss = 0.616277405262397
Epoch - 25, loss = 0.6110172814338394
Epoch - 30, loss = 0.6021737438596999
Epoch - 35, loss = 0.602004487667678
Epoch - 40, loss = 0.5957117552282201
Epoch - 45, loss = 0.5906993420989434
Epoch - 50, loss = 0.5760026194886213
Epoch - 55, loss = 0.5687105588263586
Epoch - 60, loss = 0.5617053931107686
Epoch - 65, loss = 0.5473009549111398
Epoch - 70, loss = 0.5289199350445359
Epoch - 75, loss = 0.4806801386490329
Epoch - 80, loss = 0.46958843397988226


In [61]:
tensor_x = torch.from_numpy(x_test)
tensor_y = torch.from_numpy(y_test)

test_dataset = TensorDataset(tensor_x, tensor_y)
test_dataloader = DataLoader(test_dataset, batch_size = 64)

train_predictions = []
train_labels = []
model.eval()
for i , data in enumerate(train_dataloader):
  x,y = data
  x = x.cuda()
  pred = model(x.float()).cpu().detach().numpy().tolist()
  train_predictions.extend(pred)
  train_labels.extend(y.numpy().tolist())

test_predictions = []
test_labels = []
model.eval()
for i , data in enumerate(test_dataloader):
  x,y = data
  x = x.cuda()
  pred = model(x.float()).cpu().detach().numpy().tolist()
  test_predictions.extend(pred)
  test_labels.extend(y.numpy().tolist())

In [62]:
train_predictions = [1 if x[0] > 0.5 else 0 for x in train_predictions]
test_predictions = [1 if x[0] > 0.5 else 0 for x in test_predictions]

In [63]:
print(f"Training\nAccuracy = {accuracy_score(train_predictions, train_labels)}")
precision, recall, fscore, _ = precision_recall_fscore_support(train_predictions, train_labels, average="binary")
print(f"Precision = {precision}, Recall = {recall}, F-score = {fscore}")
print(f"Accuracy = {accuracy_score(test_predictions, test_labels)}")
precision, recall, fscore, _ = precision_recall_fscore_support(test_predictions, test_labels, average="binary")
print(f"Precision = {precision}, Recall = {recall}, F-score = {fscore}")
print(f"G-measure = {geometric_mean_score(test_predictions, test_labels)}")

Training
Accuracy = 0.7857311766931517
Precision = 0.6753246753246753, Recall = 0.7369353410097431, F-score = 0.7047861075815333
Accuracy = 0.7307110438729199
Precision = 0.5959595959595959, Recall = 0.6541019955654102, F-score = 0.623678646934461
G-measure = 0.7098636202715726


In [None]:
x_train, y_train, x_test, y_test = fetch_data(destination = "./processed_grayscale_data/")
x_train.shape

(5286, 2)


(42288, 28, 28)

In [None]:
tensor_x = torch.from_numpy(x_train.reshape(x_train.shape[0],1,28,28))
tensor_y = torch.from_numpy(y_train)

train_dataset = TensorDataset(tensor_x, tensor_y)
train_dataloader = DataLoader(train_dataset, batch_size = 32)

In [None]:
NUM_EPOCHS = 100
losses = []

model = lenet()
model.cuda()
loss_fn = nn.BCELoss()
sgd = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9, weight_decay = 5e-4)

for epoch in range(NUM_EPOCHS):
  avg_loss = 0
  for i , data in enumerate(train_dataloader):
    x,y = data
    x = x.cuda()
    y = y.cuda()

    sgd.zero_grad()
    pred = model(x.float())
    loss = loss_fn(pred, y.float().view(-1,1))
    loss.backward()
    sgd.step()

    avg_loss += loss.item()
    losses.append(avg_loss/len(train_dataloader))
  if (epoch+1)%5 == 0:
    print(f"Epoch - {epoch+1}, loss = {avg_loss/len(train_dataloader)}")

Epoch - 5, loss = 0.6596542340555368
Epoch - 10, loss = 0.6327451292401441
Epoch - 15, loss = 0.625298880802893
Epoch - 20, loss = 0.6215092152950273
Epoch - 25, loss = 0.6184093433385786
Epoch - 30, loss = 0.6157982582015051
Epoch - 35, loss = 0.6135936388668412
Epoch - 40, loss = 0.6115369703190939
Epoch - 45, loss = 0.6095516331150068
Epoch - 50, loss = 0.6076937319940511
Epoch - 55, loss = 0.6059145781418672
Epoch - 60, loss = 0.6041368505897764
Epoch - 65, loss = 0.6024764412380023
Epoch - 70, loss = 0.6006611720644749
Epoch - 75, loss = 0.598919209834673
Epoch - 80, loss = 0.5986009679154215
Epoch - 85, loss = 0.5949911940758329
Epoch - 90, loss = 0.59286001466395
Epoch - 95, loss = 0.5903351053478599
Epoch - 100, loss = 0.5877407026137599


In [None]:
tensor_x = torch.from_numpy(x_test.reshape(x_test.shape[0],1,28,28))
tensor_y = torch.from_numpy(y_test)

test_dataset = TensorDataset(tensor_x, tensor_y)
test_dataloader = DataLoader(test_dataset, batch_size = 64)

train_predictions = []
train_labels = []
model.eval()
for i , data in enumerate(train_dataloader):
  x,y = data
  x = x.cuda()
  pred = model(x.float()).cpu().detach().numpy().tolist()
  train_predictions.extend(pred)
  train_labels.extend(y.numpy().tolist())

test_predictions = []
test_labels = []
model.eval()
for i , data in enumerate(test_dataloader):
  x,y = data
  x = x.cuda()
  pred = model(x.float()).cpu().detach().numpy().tolist()
  test_predictions.extend(pred)
  test_labels.extend(y.numpy().tolist())

In [None]:
train_predictions = [1 if x[0] > 0.5 else 0 for x in train_predictions]
test_predictions = [1 if x[0] > 0.5 else 0 for x in test_predictions]

In [None]:
print(f"Training\nAccuracy = {accuracy_score(train_predictions, train_labels)}")
precision, recall, fscore, _ = precision_recall_fscore_support(train_predictions, train_labels, average="binary")
print(f"Precision = {precision}, Recall = {recall}, F-score = {fscore}")
print(f"Accuracy = {accuracy_score(test_predictions, test_labels)}")
precision, recall, fscore, _ = precision_recall_fscore_support(test_predictions, test_labels, average="binary")
print(f"Precision = {precision}, Recall = {recall}, F-score = {fscore}")
print(f"G-measure = {geometric_mean_score(test_predictions, test_labels)}")

Training
Accuracy = 0.6815645100264851
Precision = 0.3778096903096903, Recall = 0.6334798994974874, F-score = 0.4733260325406759
Accuracy = 0.6732223903177005
Precision = 0.36767676767676766, Recall = 0.6046511627906976, F-score = 0.457286432160804
G-measure = 0.6475244981965965


#Color-Encoder-Net

In [64]:
class encoder_net(nn.Module):
  def __init__(self):
    super(encoder_net, self).__init__()
    self.fc1 = nn.Linear(in_features=36*36*3, out_features=6000)
    self.fc2 = nn.Linear(in_features=6000, out_features=12000)
    self.fc3 = nn.Linear(in_features=12000, out_features=3000)
    self.fc4 = nn.Linear(in_features = 3000, out_features = 1)
    self.sigmoid = nn.Sigmoid()
    self.relu = nn.ReLU()
  
  def forward(self, x):
    x = self.fc1(x)
    # x = self.sigmoid(x)
    x = self.relu(x)
    x = self.fc2(x)
    # x = self.sigmoid(x)
    x = self.relu(x)
    x = self.fc3(x)
    # x = self.sigmoid(x)
    x = self.relu(x)
    x = self.fc4(x)
    x = self.sigmoid(x)
    return x

In [65]:
class encoder_net_gray(nn.Module):
  def __init__(self):
    super(encoder_net_gray, self).__init__()
    self.fc1 = nn.Linear(in_features=28*28*1, out_features=1000)
    self.fc2 = nn.Linear(in_features=1000, out_features=500)
    self.fc3 = nn.Linear(in_features=500, out_features=250)
    self.fc4 = nn.Linear(in_features = 250, out_features = 1)
    self.sigmoid = nn.Sigmoid()
    self.relu = nn.ReLU()
  
  def forward(self, x):
    x = self.fc1(x)
    # x = self.sigmoid(x)
    x = self.relu(x)
    x = self.fc2(x)
    # x = self.sigmoid(x)
    x = self.relu(x)
    x = self.fc3(x)
    # x = self.sigmoid(x)
    x = self.relu(x)
    x = self.fc4(x)
    x = self.sigmoid(x)
    return x

In [66]:
x_train, y_train, x_test, y_test = fetch_data()
x_train.shape
tensor_x = torch.from_numpy(x_train.reshape(x_train.shape[0], -1))
tensor_y = torch.from_numpy(y_train)

train_dataset = TensorDataset(tensor_x, tensor_y)
train_dataloader = DataLoader(train_dataset, batch_size = 256)

(5286, 2)


In [67]:
NUM_EPOCHS = 75
losses = []

model = encoder_net()
model.cuda()
loss_fn = nn.BCELoss()
sgd = optim.SGD(model.parameters(), lr = 0.005, momentum = 0.9, weight_decay = 5e-4)

for epoch in range(NUM_EPOCHS):
  avg_loss = 0
  for i , data in enumerate(train_dataloader):
    x,y = data
    x = x.cuda()
    y = y.cuda()

    sgd.zero_grad()
    pred = model(x.float())
    loss = loss_fn(pred, y.float().view(-1,1))
    loss.backward()
    sgd.step()

    avg_loss += loss.item()
    losses.append(avg_loss/len(train_dataloader))
  # if (epoch+1)%5 == 0:
  print(f"Epoch - {epoch+1}, loss = {avg_loss/len(train_dataloader)}")

Epoch - 1, loss = 0.6784283227949257
Epoch - 2, loss = 0.6639528707208404
Epoch - 3, loss = 0.6594203793140779
Epoch - 4, loss = 0.6563060549368341
Epoch - 5, loss = 0.655857325318348
Epoch - 6, loss = 0.6550078979098654
Epoch - 7, loss = 0.6551069944917437
Epoch - 8, loss = 0.6550899577068995
Epoch - 9, loss = 0.6537540290190513
Epoch - 10, loss = 0.6511558814041586
Epoch - 11, loss = 0.6507203119346895
Epoch - 12, loss = 0.6495675200439361
Epoch - 13, loss = 0.6484157092061388
Epoch - 14, loss = 0.6473390816385487
Epoch - 15, loss = 0.6437328070940742
Epoch - 16, loss = 0.6462241131139089
Epoch - 17, loss = 0.6455322003149124
Epoch - 18, loss = 0.6443813374961715
Epoch - 19, loss = 0.6422863617000809
Epoch - 20, loss = 0.64163013772074
Epoch - 21, loss = 0.6374567997563316
Epoch - 22, loss = 0.6400381200105311
Epoch - 23, loss = 0.6320033503404583
Epoch - 24, loss = 0.6322170329919781
Epoch - 25, loss = 0.6430782498904022
Epoch - 26, loss = 0.6296628999063768
Epoch - 27, loss = 0.630

In [68]:
tensor_x = torch.from_numpy(x_test.reshape(x_test.shape[0], -1))
tensor_y = torch.from_numpy(y_test)

test_dataset = TensorDataset(tensor_x, tensor_y)
test_dataloader = DataLoader(test_dataset, batch_size = 64)

train_predictions = []
train_labels = []
model.eval()
for i , data in enumerate(train_dataloader):
  x,y = data
  x = x.cuda()
  pred = model(x.float()).cpu().detach().numpy().tolist()
  train_predictions.extend(pred)
  train_labels.extend(y.numpy().tolist())

test_predictions = []
test_labels = []
model.eval()
for i , data in enumerate(test_dataloader):
  x,y = data
  x = x.cuda()
  pred = model(x.float()).cpu().detach().numpy().tolist()
  test_predictions.extend(pred)
  test_labels.extend(y.numpy().tolist())

In [69]:
train_predictions = [1 if x[0] > 0.5 else 0 for x in train_predictions]
test_predictions = [1 if x[0] > 0.5 else 0 for x in test_predictions]

In [70]:
print(f"Training\nAccuracy = {accuracy_score(train_predictions, train_labels)}")
precision, recall, fscore, _ = precision_recall_fscore_support(train_predictions, train_labels, average="binary")
print(f"Precision = {precision}, Recall = {recall}, F-score = {fscore}")
print(f"Accuracy = {accuracy_score(test_predictions, test_labels)}")
precision, recall, fscore, _ = precision_recall_fscore_support(test_predictions, test_labels, average="binary")
print(f"Precision = {precision}, Recall = {recall}, F-score = {fscore}")
print(f"G-measure = {geometric_mean_score(test_predictions, test_labels)}")

Training
Accuracy = 0.693837495270526
Precision = 0.3793081918081918, Recall = 0.6689791873141725, F-score = 0.4841216081603379
Accuracy = 0.7299546142208775
Precision = 0.4404040404040404, Recall = 0.7315436241610739, F-score = 0.5498108448928121
G-measure = 0.7305171857259346
