In [1]:
from pathlib import Path
from tqdm import tqdm, tqdm_notebook
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import torch
from sklearn.metrics import accuracy_score, roc_auc_score
from torch.autograd import Variable
# put your own path to the data root directory (see example in `Data architecture` section)
data_dir = Path("..")

# load the training and testing data sets
train_features_dir = data_dir / "train_input" / "moco_features"
test_features_dir = data_dir / "test_input" / "moco_features"
df_train = pd.read_csv(data_dir  / "supplementary_data" / "train_metadata.csv")
df_test = pd.read_csv(data_dir  / "supplementary_data" / "test_metadata.csv")

# concatenate y_train and df_train
y_train = pd.read_csv(data_dir  / "train_output_76GDcgx.csv")
df_train = df_train.merge(y_train, on="Sample ID")

print(f"Training data dimensions: {df_train.shape}")  # (344, 4)
df_train.head()

X_train = []
y_train = []
centers_train = []
patients_train = []

for sample, label, center, patient in tqdm(
    df_train[["Sample ID", "Target", "Center ID", "Patient ID"]].values
):
    # load the coordinates and features (1000, 3+2048)
    _features = np.load(train_features_dir / sample)
    # get coordinates (zoom level, tile x-coord on the slide, tile y-coord on the slide)
    # and the MoCo V2 features
    coordinates, features = _features[:, :3], _features[:, 3:]  # Ks
    # slide-level averaging
    #X_train.append(np.mean(features, axis=0))
    
    X_train.append(features)
    y_train.append([label])
    centers_train.append(center)
    patients_train.append(patient)

# convert to numpy arrays
X_train = np.array(X_train)
y_train = np.array(y_train)
centers_train = np.array(centers_train)
patients_train = np.array(patients_train)



Training data dimensions: (344, 4)


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 344/344 [00:01<00:00, 287.38it/s]


In [2]:
X_train = torch.from_numpy(X_train)

In [10]:
torch.from_numpy([1,2])

TypeError: expected np.ndarray (got list)

In [15]:
X_train.max()

tensor(6.0380)

In [17]:
torch.argmax(X_train)

tensor(457401122)

In [20]:
X_train.shape

torch.Size([344, 1000, 2048])

In [None]:
X_train[:,:,X_train.argmax(dim=-1)]


In [13]:
X_train.argmax()

tensor(457401122)

In [28]:
import torch
import torch.nn.functional as F
import torch.nn as nn 


def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

#model.fc2.register_forward_hook(get_activation('fc2'))

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.n_tiles = 1000
        self.input_shape = 2048
        
        self.base_linear = nn.Linear(self.input_shape,1)
        
        self.base_layer = nn.Sequential(
                self.base_linear,
                nn.Dropout(.1),
                nn.Sigmoid())
        
        self.last_linear = nn.Linear(self.n_tiles, 1)

        self.last_layer = nn.Sequential(
                self.last_linear,
                #nn.Sigmoid()
                nn.Softmax())


    def forward(self, x):
        x = x.squeeze(0)
        
        x_=list()
        for j in range(self.n_tiles):
            x_.append(self.base_layer(x[:,j,:]))
        
        print(x_)
        x = torch.from_numpy(np.array(x_))   
        
        max_val, max_index = torch.max(x, dim=-1)
        
        self.last_linear.weight.requires_grad = False
        self.last_linear.weight[:, max_index].requires_grad = True
        
        output = self.last_layer(x)
        
        return output
            
        
            
    
    

In [29]:
model = Model()

In [30]:
model.forward(X_train)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.

In [31]:
import torch.nn as nn

class BinaryClassificationModel(nn.Module):
    def __init__(self):
        super(BinaryClassificationModel, self).__init__()

        # Convolutional layer, with kernel size (1,3) and stride (1,2)
        self.conv1 = nn.Conv1d(in_channels=2048, out_channels=32, kernel_size=3, stride=2)
        self.relu1 = nn.ReLU()

        # Convolutional layer, with kernel size (1,3) and stride (1,2)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=2)
        self.relu2 = nn.ReLU()

        # Convolutional layer, with kernel size (1,3) and stride (1,2)
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=2)
        self.relu3 = nn.ReLU()

        # Fully connected layer
        self.fc = nn.Linear(in_features=128, out_features=2)

    def forward(self, x):
        x = x.permute(0,2,1) # Permute the axis 1 and 2
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = x.view(x.size(0), -1) # Flatten the tensor
        x = self.fc(x)
        return x

In [32]:
model = BinaryClassificationModel()