In [None]:
import numpy as np
import pandas as pd
import os
import shutil
import pickle
import glob
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from zipfile import ZipFile
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import ast

In [None]:
root_path = '/content/drive/MyDrive/Breast Cancer Preprocessed Datasets/'

## CBIS

In [None]:
cbis = pd.read_csv(root_path+'/cbis-meta-data.csv')
cbis.head()

## 4Class

In [None]:
class_names = ['Benign calcification', 'Malignant calcification', 'Benign mass', 'Malignant mass']

In [None]:
label_dict = {
    'Benign calcification': [1, 0, 0, 0],
    'Malignant calcification': [0, 1, 0, 0],
    'Benign mass': [0, 0, 1, 0],
    'Malignant mass': [0, 0, 0, 1],
}

label = []

for i in range(len(cbis)):
    label.append(label_dict[cbis['Pathology'].values[i]+' '+cbis['Type'].values[i]])

cbis[class_names] = label

In [None]:
x = list(cbis.index)
y = cbis[class_names].values


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)
print(len(x_train))

### Multi View Training

In [None]:
columns = ['CC', 'MLO']
columns.extend(class_names)
columns

In [None]:
cbis_train = cbis.iloc[x_train]
cbis_train = cbis_train[columns]
cbis_train

In [None]:
cbis_validation = cbis.iloc[x_test]
cbis_validation = cbis_validation[columns]
cbis_validation

### Single View Training

In [None]:
columns = ['imgfile']
columns.extend(class_names)
columns

In [None]:
x_names = []
labels = []

for i in tqdm(range(len(x_train))):
    cbis_ind = cbis.iloc[x_train[i]]

    x = cbis_ind[['CC', 'MLO']]
    y = [cbis_ind[class_names] for _ in range(2)]

    x_names.extend(x)
    labels.extend(y)

cbis_train = pd.DataFrame(index=np.arange(len(x_names)), columns=columns)
cbis_train['imgfile'] = x_names
cbis_train[class_names] = labels
cbis_train

In [None]:
x_names = []
labels = []

for i in tqdm(range(len(x_test))):
    cbis_ind = cbis.iloc[x_test[i]]

    x = cbis_ind[['CC', 'MLO']]
    y = [cbis_ind[class_names] for _ in range(2)]

    x_names.extend(x)
    labels.extend(y)

cbis_validation = pd.DataFrame(index=np.arange(len(x_names)), columns=columns)
cbis_validation['imgfile'] = x_names
cbis_validation[class_names] = labels
cbis_validation