In [278]:
import pandas as pd
import numpy as np
import torch
from torch import tensor, Tensor, nn, optim
from torch.utils.data import Dataset, DataLoader
from torch import functional as F
import matplotlib.pyplot as plt
import torchvision as tv
from random import choice
from pathlib import Path

from sklearn.preprocessing import MinMaxScaler

import warnings

In [4]:
datapath = Path('data')
img_folder = datapath/'256'
train_csv = datapath/'train.csv'

In [23]:
full_df = pd.read_csv(train_csv)
full_df['path'] = img_folder/(full_df['patient_id'].astype(str)+ '_' +full_df['image_id'].astype(str)+'.png')

In [38]:
full_df.head(1)

Unnamed: 0,site_id,patient_id,image_id,laterality,view,age,cancer,biopsy,invasive,BIRADS,implant,density,machine_id,difficult_negative_case,path
0,2,10006,462822612,L,CC,61.0,0,0,0,,0,,29,False,data\256\10006_462822612.png


In [None]:
full_df['cancer'].sum()

In [304]:
class BCancerDataset(Dataset):
    def __init__(self, annot_df: pd.DataFrame, slc: slice, scaler: callable): # Maybe transforms here ?
        super(BCancerDataset, self).__init__()
        self.scaler = scaler

        # prep annotations
        self.annotations = annot_df.iloc[slc]
        self.annotations['path'] = img_folder/(full_df['patient_id'].astype(str)+ '_' +full_df['image_id'].astype(str)+'.png')


    def __len__(self) -> int:
        return len(self.annotations)

    def __getitem__(self, item) -> (Tensor, Tensor):
        image_data = tensor(
            self.scaler.transform(tv.io.read_image(str(self.annotations['path'].iloc[item]), tv.io.ImageReadMode.GRAY).reshape(-1, 1)), dtype=torch.float32)
        label = (torch.tensor(self.annotations['cancer'].iloc[item])).type(torch.LongTensor)
        return image_data, label


def gen_ds(split: float, annot_f: Path, bs=128) -> (Dataset, Dataset):
    if split > 1:
        raise Exception(' > 1')
    f_data = np.arange(0, 255, 1).reshape(-1, 1)
    scaler = MinMaxScaler()
    scaler.fit(f_data)

    bs = bs
    annotdf = pd.read_csv(annot_f)
    len_df = len(annotdf)
    split_pt = int(len_df * split)
    print(split_pt)
    train_dl = DataLoader(BCancerDataset(annotdf, slice(0, split_pt), scaler), batch_size=bs, shuffle=True)
    val_dl = DataLoader(BCancerDataset(annotdf, slice(split_pt, len_df), scaler), batch_size=bs, shuffle=True)
    return train_dl, val_dl


In [305]:
with warnings.catch_warnings():
    warnings.filterwarnings(action='ignore')
    train_dl, val_ds = gen_ds(0.8, train_csv)

43764


In [311]:
train_dl.dataset[0][0].shape

torch.Size([65536, 1])

In [307]:
class CancerNN(nn.Module):
    def __init__(self, in_s: int, out_s:int):
        super(CancerNN, self).__init__()
        self.layers = nn.ModuleList([
            nn.Linear(in_s, 512),
            nn.SELU(),
            nn.Linear(512, 256),
            nn.SELU(),
            nn.Linear(256, out_s)
        ])

    def __call__(self, x):
        for l in self.layers:
            x = l(x)
        return x

In [321]:
model = CancerNN((256*256), 2)
loss_fn = torch.functional.F.cross_entropy
opt = optim.SGD(model.parameters(), 0.2)

In [None]:
for e in range(0, 10):
    for xb,yb in train_dl:
        preds = model(xb.reshape(128, 65536))
        loss = loss_fn(preds, yb)
        loss.backward()
        opt.step()
        opt.zero_grad()
    print(loss)

