# Notebook Initialization

In [1]:
# %load nb_init.py

from pathlib import Path
import pandas as pd

base_dir = Path.cwd().parent
config_dir = base_dir / "config"
data_dir = base_dir / "data"
docs_dir = base_dir / "docs"
figures_dir = docs_dir / "figures"
models_dir = base_dir / "models"
logs_dir = base_dir / "logs"
images_input_dir = data_dir / "COVID19"
preprocessed_dir = data_dir / "preprocessed"
output_dir = data_dir / "output"

# Directories used to train the CNN (image by image) 
cnn_data_dir = data_dir / "modelling" / "cnn"
cnn_train_dir = cnn_data_dir / "train"
cnn_test_dir = cnn_data_dir / "test"

metadata_file = images_input_dir / "metadata.csv"
labels_file = images_input_dir / "unzip_filenames.csv"
preprocessed_labels_file = preprocessed_dir / "labels.parquet"

feature_extractor_model_file = models_dir / "feature_extractor.tf"

from pyspark.sql import SparkSession
from pyspark.sql import functions as F

spark = SparkSession.builder.getOrCreate()

config_file = config_dir / "tfg.conf"

from pyhocon import ConfigFactory
config = None

def load_config():
    return ConfigFactory.parse_file(config_file)

config = load_config()
    
import sys

if str(base_dir / "src") not in sys.path:
    sys.path.append(str(base_dir / "src"))

%load_ext autoreload

%autoreload 2

In [2]:
spark

In [3]:
from tfg import DataRepository

repo = DataRepository(config=config, base_data_path=data_dir)

# Load datasets

In [8]:
import numpy as np

In [4]:
class_idx = {'CP': 0, 'NCP': 1, 'Normal': 2}
idx_to_class = { v: k for k, v in class_idx.items() }

In [5]:
train_clips = repo.load("train_clips")
train_clips.head(5)

Unnamed: 0,patient_id,scan_id,label,n_slice,clip_num,clip_start_file,clip_end_file,seq_features
0,0,3131,CP,285,0,CP/0/3131/0000.png,CP/0/3131/0069.png,"[[0.0, 0.0, 2.9882235527038574, 0.0, 0.0, 0.0,..."
1,0,3131,CP,285,1,CP/0/3131/0070.png,CP/0/3131/0139.png,"[[0.0, 0.015588469803333282, 2.548190832138061..."
2,0,3131,CP,285,2,CP/0/3131/0140.png,CP/0/3131/0209.png,"[[0.3391307592391968, 0.016530275344848633, 2...."
3,0,3131,CP,285,3,CP/0/3131/0210.png,CP/0/3131/0279.png,"[[0.29484760761260986, 0.0, 1.7729129791259766..."
4,0,3131,CP,285,4,CP/0/3131/0280.png,CP/0/3131/0284.png,"[[2.112011671066284, 0.0, 3.815093755722046, 0..."


In [61]:
def to_single_nparr(arrs):
    return np.array([el.tolist() for el  in arrs.tolist()])

def ohe_label(label):
    ohe_labels = [0, 0, 0]
    ohe_labels[class_idx[label]] = 1
    
    return np.array(ohe_labels)

In [62]:
train_targets = train_clips["label"].apply(ohe_label).values
train_targets = to_single_nparr(train_targets)
train_targets[:5], train_targets[-5:]

(array([[1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [1, 0, 0]]),
 array([[1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [1, 0, 0]]))

In [18]:
train_data = train_clips["seq_features"].apply(to_single_nparr).values
train_data[:1], train_data[-1:]

(array([array([[0.        , 0.        , 2.98822355, ..., 0.72927612, 0.        ,
         0.44937783],
        [0.36093032, 0.        , 4.37772894, ..., 1.30455339, 0.        ,
         1.11693501],
        [0.98390299, 0.        , 3.60125709, ..., 1.25560212, 0.        ,
         1.22812939],
        ...,
        [0.        , 0.06825965, 1.86592245, ..., 0.05172634, 0.        ,
         0.        ],
        [0.81235242, 0.2658385 , 4.02565336, ..., 0.        , 0.56523615,
         0.        ],
        [2.29294705, 0.        , 4.27155209, ..., 1.46142101, 0.        ,
         1.49820328]])], dtype=object),
 array([array([[0.        , 1.02570331, 0.        , ..., 0.        , 0.77238965,
         0.        ],
        [0.        , 1.11100841, 0.        , ..., 0.        , 0.84015405,
         0.        ],
        [0.        , 2.30372715, 0.60316122, ..., 0.        , 1.95132911,
         0.        ],
        ...,
        [0.        , 2.52368426, 0.        , ..., 0.        , 2.12280846,
    

In [19]:
train_data.shape

(6117,)

In [20]:
train_data[0].shape

(70, 32)

In [37]:
num_obs, seq_length, num_feats = train_data.shape[0], train_data[0].shape[0], train_data[0].shape[1]
new_shape = (num_obs, seq_length, num_feats)
new_training_data = np.zeros(new_shape)
new_training_data.shape

(6117, 70, 32)

In [38]:
# With this we make sure that the final data is a single numpy array
for idx, obs in enumerate(train_data):
    if len(obs) < seq_length:
        for feat_idx, feats in enumerate(obs):
            new_training_data[idx][feat_idx] = feats
    else:
        new_training_data[idx] = obs

In [41]:
val_clips = repo.load("val_clips")
val_clips.head(5)

Unnamed: 0,patient_id,scan_id,label,n_slice,clip_num,clip_start_file,clip_end_file,seq_features
0,4,3505,CP,298,0,CP/4/3505/0000.png,CP/4/3505/0069.png,"[[2.613647699356079, 0.0, 0.9890813827514648, ..."
1,4,3505,CP,298,1,CP/4/3505/0070.png,CP/4/3505/0139.png,"[[3.2934000492095947, 0.0, 5.15690279006958, 0..."
2,4,3505,CP,298,2,CP/4/3505/0140.png,CP/4/3505/0209.png,"[[3.833109140396118, 0.0, 5.286610126495361, 0..."
3,4,3505,CP,298,3,CP/4/3505/0210.png,CP/4/3505/0279.png,"[[3.43273663520813, 0.0, 0.621675968170166, 0...."
4,4,3505,CP,298,4,CP/4/3505/0280.png,CP/4/3505/0297.png,"[[4.024286270141602, 0.0, 1.0903337001800537, ..."


In [63]:
val_targets = val_clips["label"].apply(ohe_label).values
val_targets = to_single_nparr(val_targets)
val_targets[:5], val_targets[-5:]

(array([[1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [1, 0, 0]]),
 array([[0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0]]))

In [43]:
val_data = val_clips["seq_features"].apply(to_single_nparr).values
val_data[:1], val_data[-1:]

(array([array([[2.6136477 , 0.        , 0.98908138, ..., 0.80635095, 0.        ,
         0.83522969],
        [3.08556819, 0.        , 1.16019964, ..., 0.82768691, 0.        ,
         0.94938755],
        [3.13986325, 0.        , 2.20502305, ..., 1.3187505 , 0.        ,
         1.50978446],
        ...,
        [3.3429215 , 0.        , 4.32882261, ..., 1.8979671 , 0.        ,
         2.183213  ],
        [3.63964081, 0.        , 4.41138649, ..., 1.97474813, 0.        ,
         2.31390309],
        [3.54425406, 0.        , 5.61092615, ..., 2.367167  , 0.        ,
         2.77212691]])], dtype=object),
 array([array([[0.        , 0.40822747, 0.        , ..., 0.        , 0.46930605,
         0.        ],
        [0.44518673, 0.14363712, 0.        , ..., 0.        , 0.18813239,
         0.        ],
        [0.74548346, 0.14900705, 0.        , ..., 0.        , 0.20418029,
         0.        ],
        ...,
        [0.22963272, 0.        , 1.32374883, ..., 0.81885922, 0.        ,
    

In [45]:
num_val_obs = val_data.shape[0]
new_val_shape = (num_val_obs, seq_length, num_feats)
new_val_data = np.zeros(new_val_shape)
new_val_data.shape

(744, 70, 32)

In [46]:
# With this we make sure that the final data is a single numpy array
for idx, obs in enumerate(val_data):
    if len(obs) < seq_length:
        for feat_idx, feats in enumerate(obs):
            new_val_data[idx][feat_idx] = feats
    else:
        new_val_data[idx] = obs

# Create model

In [48]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [88]:
model = keras.Sequential()

num_recurrent_units = 16
seq_length = 70

model.add(
    layers.Bidirectional(layers.GRU(num_recurrent_units, return_sequences=True), input_shape=(seq_length, num_features), name="gru_bidirect_1")
)
model.add(layers.Bidirectional(layers.GRU(num_recurrent_units // 2), name="gru_bidirect_2"))
# model.add(
#     layers.GRU(64, return_sequences=True, input_shape=(seq_length, num_features))
# )
# model.add(layers.GRU(num_features // 2))
model.add(layers.Dense(min(10, num_recurrent_units // 2)))
model.add(layers.Dense(3))

model.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_bidirect_1 (Bidirectiona (None, 70, 32)            4800      
_________________________________________________________________
gru_bidirect_2 (Bidirectiona (None, 16)                2016      
_________________________________________________________________
dense_13 (Dense)             (None, 8)                 136       
_________________________________________________________________
dense_14 (Dense)             (None, 3)                 27        
Total params: 6,979
Trainable params: 6,979
Non-trainable params: 0
_________________________________________________________________


In [89]:
model.compile(optimizer='adam',
              loss="categorical_crossentropy",
              metrics=['categorical_accuracy'])

In [None]:
fit_history = model.fit(
    x=new_training_data, y=train_targets,
    batch_size=32, epochs=20,
    verbose=1,
    validation_data=(new_val_data, val_targets)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20

In [85]:
fit_history2 = model.fit(
    x=new_training_data, y=train_targets,
    batch_size=32, epochs=15,
    verbose=1,
    validation_data=(new_val_data, val_targets),
    initial_epoch=5
)

Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [54]:
model.fit?

[0;31mSignature:[0m
[0mmodel[0m[0;34m.[0m[0mfit[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mx[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0my[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_size[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mepochs[0m[0;34m=[0m[0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverbose[0m[0;34m=[0m[0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcallbacks[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mvalidation_split[0m[0;34m=[0m[0;36m0.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mvalidation_data[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mshuffle[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclass_weight[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msample_weight[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m   

# Load model

In [5]:
from tensorflow.keras.models import load_model
cnn_model_suffix = "06"
model_file = models_dir / f"feature_extractor_{cnn_model_suffix}.tf"
model = load_model(str(model_file))

model.summary()

Model: "covid_classifier"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_images (InputLayer)    [(None, 128, 128, 3)]     0         
_________________________________________________________________
conv2d_01 (Conv2D)           (None, 128, 128, 6)       168       
_________________________________________________________________
maxpool2d_01 (MaxPooling2D)  (None, 64, 64, 6)         0         
_________________________________________________________________
conv2d_02 (Conv2D)           (None, 64, 64, 12)        660       
_________________________________________________________________
maxpool2d_02 (MaxPooling2D)  (None, 32, 32, 12)        0         
_________________________________________________________________
conv2d_03 (Conv2D)           (None, 32, 32, 24)        2616      
_________________________________________________________________
maxpool2d_03 (MaxPooling2D)  (None, 16, 16, 24)   

In [19]:
import tensorflow as tf

new_model = tf.keras.Sequential()

In [16]:
model.layers[:-2]

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x7f5be0305460>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f5be0305c70>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x7f5be02d09a0>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f5be02d0eb0>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x7f5be026faf0>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f5be026ffd0>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x7f5be0273c40>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f5be0277190>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x7f5be0277d90>,
 <tensorflow.python.keras.layers.convolutional.Conv2D at 0x7f5be027d2e0>,
 <tensorflow.python.keras.layers.core.Dropout at 0x7f5be027de20>,
 <tensorflow.python.keras.layers.pooling.MaxPooling2D at 0x7f5be02893a0>,
 <tensorflow.python.keras.layers.core.Flatten at 0x7f5be02897c0>,
 <tensorflow.python.keras.layers.core.Dense at 0x7f5

In [20]:
for l in model.layers[:-1]:
    if isinstance(l, tf.python.keras.layers.core.Dropout):
        continue
    new_model.add(l)

In [21]:
new_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_01 (Conv2D)           (None, 128, 128, 6)       168       
_________________________________________________________________
maxpool2d_01 (MaxPooling2D)  (None, 64, 64, 6)         0         
_________________________________________________________________
conv2d_02 (Conv2D)           (None, 64, 64, 12)        660       
_________________________________________________________________
maxpool2d_02 (MaxPooling2D)  (None, 32, 32, 12)        0         
_________________________________________________________________
conv2d_03 (Conv2D)           (None, 32, 32, 24)        2616      
_________________________________________________________________
maxpool2d_03 (MaxPooling2D)  (None, 16, 16, 24)        0         
_________________________________________________________________
conv2d_04 (Conv2D)           (None, 16, 16, 48)       

# Load datasets

In [22]:
train_df = repo.load("train_df")
train_df.head(5)

Unnamed: 0,label,file,patient_id,scan_id,n_slice,num_clips,seq_num,clip_num
0,CP,CP/0/3131/0275.png,0,3131,285,0,0,0
1,CP,CP/0/3131/0064.png,0,3131,285,0,1,0
2,CP,CP/0/3131/0083.png,0,3131,285,0,2,0
3,CP,CP/0/3131/0160.png,0,3131,285,0,3,0
4,CP,CP/0/3131/0127.png,0,3131,285,0,4,0


It seems like the num_clips, seq_num and clip_num are not correct, we'll fix them..

In [142]:
from pyspark.sql import Window

images_per_clip = config.get_int("tfg.training.images_per_clip")

scan_window = Window\
    .partitionBy("patient_id", "scan_id")\
    .orderBy("file")

In [143]:
def fix_df(pandas_df):
    df = spark.createDataFrame(pandas_df)

    new_df = df\
        .withColumn("seq_num", F.row_number().over(scan_window) - 1)\
        .withColumn("num_clips", F.ceil(F.col("n_slice") / images_per_clip))\
        .withColumn("clip_num", F.floor(F.col("seq_num") / images_per_clip))\
        .toPandas()

    return new_df

In [101]:
train_df = repo.load("train_df")
train_df = fix_df(train_df)
repo.save("train_df", train_df)
train_df.head()

Unnamed: 0,label,file,patient_id,scan_id,n_slice,num_clips,seq_num,clip_num
0,NCP,NCP/880/2415/0000.jpg,880,2415,312,5,0,0
1,NCP,NCP/880/2415/0001.jpg,880,2415,312,5,1,0
2,NCP,NCP/880/2415/0002.jpg,880,2415,312,5,2,0
3,NCP,NCP/880/2415/0003.jpg,880,2415,312,5,3,0
4,NCP,NCP/880/2415/0004.jpg,880,2415,312,5,4,0


In [None]:
val_df = repo.load("val_df")
val_df = fix_df(val_df)
repo.save("val_df", val_df)
val_df.head()

In [115]:
test_df = repo.load("test_df")
test_df = fix_df(test_df)
repo.save("test_df", test_df)
test_df.head()

True

# Convert images into features

In [23]:
img_size = config.get_int("tfg.training.img_size")
image_target_size = (img_size, img_size)
seed = config.get_int("tfg.seed")
batch_size = config.get_int("tfg.training.batch_size")

In [None]:
from keras_preprocessing.image import ImageDataGenerator

test_datagen = ImageDataGenerator(
    rescale=1./255.,
)

## Train

In [30]:
train_df = repo.load("train_df")
train_generator = test_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=str(images_input_dir),
    x_col="file",
    y_col="label",
    batch_size=batch_size,
    seed=seed,
    shuffle=False,
    class_mode="categorical",
    target_size=image_target_size)

Found 331286 validated image filenames belonging to 3 classes.


In [50]:
train_generator.class_indices

{'CP': 0, 'NCP': 1, 'Normal': 2}

In [51]:
class_to_idx = train_generator.class_indices
idx_to_class = { v: k for k, v in class_to_idx.items() }

In [46]:
NUM_TRAIN_BATCHES = train_generator.n // train_generator.batch_size + 1

In [47]:
raw_train_feats = new_model.predict(train_generator, steps=NUM_TRAIN_BATCHES, verbose=1)
raw_train_feats.shape



(331286, 32)

In [74]:
train_df["img_features"] = raw_train_feats.tolist()

In [75]:
repo.save("train_features", train_df)

True

In [166]:
train_feats = repo.load("train_features")
train_feats.head(5)

Unnamed: 0,file,label,patient_id,scan_id,n_slice,num_clips,seq_num,clip_num,img_features
0,NCP/880/2415/0000.jpg,NCP,880,2415,312,5,0,0,"[0.0, 5.270750045776367, 0.0, 5.07415866851806..."
1,NCP/880/2415/0001.jpg,NCP,880,2415,312,5,1,0,"[0.0, 2.8091320991516113, 0.0, 2.9504299163818..."
2,NCP/880/2415/0002.jpg,NCP,880,2415,312,5,2,0,"[0.0, 3.616379737854004, 0.19390609860420227, ..."
3,NCP/880/2415/0003.jpg,NCP,880,2415,312,5,3,0,"[0.0, 2.734466314315796, 0.0, 2.81957268714904..."
4,NCP/880/2415/0004.jpg,NCP,880,2415,312,5,4,0,"[0.0, 9.06471061706543, 0.0, 7.898210525512695..."


## Validation

In [48]:
val_df = repo.load("val_df")

val_generator = test_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=str(images_input_dir),
    x_col="file",
    y_col="label",
    batch_size=batch_size,
    seed=seed,
    shuffle=False,
    class_mode="categorical",
    target_size=image_target_size)

NUM_VAL_BATCHES = val_generator.n // val_generator.batch_size + 1

raw_val_feats = new_model.predict(val_generator, steps=NUM_VAL_BATCHES, verbose=1)
raw_val_feats.shape

Found 39828 validated image filenames belonging to 3 classes.


(39828, 32)

In [132]:
val_df["img_features"] = raw_val_feats.tolist()
val_df.head()

Unnamed: 0,file,img_features
0,CP/1075/3118/0543.jpg,"[0.5893998742103577, 0.0, 3.105539083480835, 0..."
1,CP/1075/3118/0174.jpg,"[2.171123504638672, 0.0, 5.15280818939209, 0.0..."
2,CP/1075/3118/0130.jpg,"[2.2265334129333496, 0.0, 6.776073455810547, 0..."
3,CP/1075/3118/0525.jpg,"[1.0836143493652344, 0.0, 3.375173330307007, 0..."
4,CP/1075/3118/0152.jpg,"[2.058112621307373, 0.0, 5.975271224975586, 0...."


In [134]:
repo.save("val_features", val_df)

True

## Test

In [49]:
test_df = repo.load("test_df")

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=str(images_input_dir),
    x_col="file",
    y_col="label",
    batch_size=batch_size,
    seed=seed,
    shuffle=False,
    class_mode="categorical",
    target_size=image_target_size)

NUM_TEST_BATCHES = test_generator.n // test_generator.batch_size + 1

raw_test_feats = new_model.predict(test_generator, steps=NUM_TEST_BATCHES, verbose=1)
raw_test_feats.shape

Found 40415 validated image filenames belonging to 3 classes.


(40415, 32)

In [71]:
test_df["img_features"] = raw_test_feats.tolist()
repo.save("test_features", test_df)

test_feats = repo.load("test_features")
test_feats.head(5)

Unnamed: 0,label,file,patient_id,scan_id,n_slice,num_clips,seq_num,clip_num,img_features
0,CP,CP/1/3143/0275.png,1,3143,300,1,0,0,"[0.5805813670158386, 0.0, 1.381737232208252, 0..."
1,CP,CP/1/3143/0064.png,1,3143,300,1,1,0,"[0.4474046230316162, 0.0, 1.0005760192871094, ..."
2,CP,CP/1/3143/0083.png,1,3143,300,1,2,0,"[0.157572939991951, 0.0, 1.0315451622009277, 0..."
3,CP,CP/1/3143/0160.png,1,3143,300,1,3,0,"[0.4971274733543396, 0.0, 1.757103681564331, 0..."
4,CP,CP/1/3143/0286.png,1,3143,300,1,4,0,"[0.0, 0.35277533531188965, 2.1408023834228516,..."


# Group features into clips

In [148]:
def agg_features(series):
    series = sorted(series, key=lambda t: t[0])
    
    return [seq_feats[1] for seq_feats in series]

In [167]:
def make_clips(df):
    df["seq_features"] = list(zip(df.seq_num, df.img_features))
    df_clips = df\
        .groupby(by=["patient_id", "scan_id", "label", "n_slice", "clip_num"])\
        .agg({
            'file': ["min", "max"],
            'seq_features': [agg_features]
        })\
        .reset_index()

    # Takes care of the multi-index after the groupby
    df_clips.columns = df_clips.columns.map('_'.join)

    df_clips = df_clips.rename(columns={
        "seq_features_agg_features": "seq_features",
        "file_min": "clip_start_file",
        "file_max": "clip_end_file",
    })
    
    return df_clips

In [172]:
train_feats = repo.load("train_features")
train_clips = make_clips(train_feats)
train_clips.head()

Unnamed: 0,patient_id_,scan_id_,label_,n_slice_,clip_num_,clip_start_file,clip_end_file,seq_features
0,0,3131,CP,285,0,CP/0/3131/0000.png,CP/0/3131/0069.png,"[[0.0, 0.0, 2.9882235527038574, 0.0, 0.0, 0.0,..."
1,0,3131,CP,285,1,CP/0/3131/0070.png,CP/0/3131/0139.png,"[[0.0, 0.015588469803333282, 2.548190832138061..."
2,0,3131,CP,285,2,CP/0/3131/0140.png,CP/0/3131/0209.png,"[[0.3391307592391968, 0.016530275344848633, 2...."
3,0,3131,CP,285,3,CP/0/3131/0210.png,CP/0/3131/0279.png,"[[0.29484760761260986, 0.0, 1.7729129791259766..."
4,0,3131,CP,285,4,CP/0/3131/0280.png,CP/0/3131/0284.png,"[[2.112011671066284, 0.0, 3.815093755722046, 0..."


In [173]:
repo.save("train_clips", train_clips)

True

In [174]:
val_feats = repo.load("val_features")
val_clips = make_clips(val_feats)
val_clips.head()

Unnamed: 0,patient_id_,scan_id_,label_,n_slice_,clip_num_,clip_start_file,clip_end_file,seq_features
0,4,3505,CP,298,0,CP/4/3505/0000.png,CP/4/3505/0069.png,"[[2.613647699356079, 0.0, 0.9890813827514648, ..."
1,4,3505,CP,298,1,CP/4/3505/0070.png,CP/4/3505/0139.png,"[[3.2934000492095947, 0.0, 5.15690279006958, 0..."
2,4,3505,CP,298,2,CP/4/3505/0140.png,CP/4/3505/0209.png,"[[3.833109140396118, 0.0, 5.286610126495361, 0..."
3,4,3505,CP,298,3,CP/4/3505/0210.png,CP/4/3505/0279.png,"[[3.43273663520813, 0.0, 0.621675968170166, 0...."
4,4,3505,CP,298,4,CP/4/3505/0280.png,CP/4/3505/0297.png,"[[4.024286270141602, 0.0, 1.0903337001800537, ..."


In [175]:
repo.save("val_clips", val_clips)

True

In [176]:
test_feats = repo.load("test_features")
test_clips = make_clips(test_feats)
test_clips.head()

Unnamed: 0,patient_id_,scan_id_,label_,n_slice_,clip_num_,clip_start_file,clip_end_file,seq_features
0,1,3143,CP,300,0,CP/1/3143/0000.png,CP/1/3143/0069.png,"[[0.7901433110237122, 0.0, 0.4899705648422241,..."
1,1,3143,CP,300,1,CP/1/3143/0070.png,CP/1/3143/0139.png,"[[0.39713576436042786, 0.0, 0.7319959402084351..."
2,1,3143,CP,300,2,CP/1/3143/0140.png,CP/1/3143/0209.png,"[[0.655151903629303, 0.0, 1.253217101097107, 0..."
3,1,3143,CP,300,3,CP/1/3143/0210.png,CP/1/3143/0279.png,"[[1.5254734754562378, 0.0, 3.0512397289276123,..."
4,1,3143,CP,300,4,CP/1/3143/0280.png,CP/1/3143/0299.png,"[[0.0, 0.09542667120695114, 1.2658686637878418..."


In [177]:
repo.save("test_clips", test_clips)

True