Copyright by Arjun Sarkar Research Group Applied Systems Biology - Head: Prof. Dr. Marc Thilo Figge https://www.leibniz-hki.de/en/applied-systems-biology.html HKI-Center for Systems Biology of Infection Leibniz Institute for Natural Product Research and Infection Biology - Hans Knöll Insitute (HKI) Adolf-Reichwein-Straße 23, 07745 Jena, Germany

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import numpy as np
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import cv2 as cv
import shutil

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="3"

strategy = tf.distribute.MirroredStrategy(["GPU:0"])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [3]:
df = pd.read_csv('Video_list_split_cells.csv')
df

Unnamed: 0,folder_num,video_name,labels,class_label
0,19,19_57_57_184.avi,cg,1
1,19,19_158_158_289.avi,cg,1
2,19,19_49_49_157.avi,cg,1
3,19,19_10_10_22.avi,cg,1
4,19,19_241_241_351.avi,cg,1
...,...,...,...,...
10669,31,31_739_739_442.avi,cg,1
10670,31,31_678_678_430.avi,cg,1
10671,31,31_322_322_267.avi,cg,1
10672,31,31_592_592_388.avi,cg,1


In [4]:
IMG_SIZE = 224
MAX_SEQ_LENGTH = 40
NUM_FEATURES = 1280

In [5]:
def load_video(path):
    
    cap = cv.VideoCapture(path)
    frames = []
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
        
            frame = cv.resize(frame,(IMG_SIZE,IMG_SIZE))
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

    finally:
        cap.release()
    return np.array(frames)

In [6]:
with strategy.scope():
    
    def build_feature_extractor():
        feature_extractor = tf.keras.applications.efficientnet.EfficientNetB0(
            weights="imagenet",
            include_top=False,
            pooling="avg",
            input_shape=(IMG_SIZE, IMG_SIZE, 3),
        )
        preprocess_input = tf.keras.applications.efficientnet.preprocess_input

        inputs = tf.keras.Input((IMG_SIZE, IMG_SIZE, 3))
        preprocessed = preprocess_input(inputs)

        outputs = feature_extractor(preprocessed)
        return tf.keras.Model(inputs, outputs, name="feature_extractor")


    feature_extractor = build_feature_extractor()

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

In [7]:
feature_extractor.summary()

Model: "feature_extractor"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 efficientnetb0 (Functional)  (None, 1280)             4049571   
                                                                 
Total params: 4,049,571
Trainable params: 4,007,548
Non-trainable params: 42,023
_________________________________________________________________


In [8]:
def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["video_name"].values.tolist()
    labels = df["class_label"].values

    # `frame_features` are what we will feed to our sequence model.
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        frames = load_video(os.path.join(root_dir, path))
        
        frames = frames[None, ...]

        # Initialize placeholder to store the features of the current video.
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                if np.mean(batch[j, :]) > 0.0:
                    temp_frame_features[i, j, :] = feature_extractor.predict(
                        batch[None, j, :]
                    )

                else:
                    temp_frame_features[i, j, :] = 0.0

        frame_features[idx,] = temp_frame_features.squeeze()

    return frame_features, labels

In [9]:
df = df[df['folder_num'].isin([29,30,31,32,33,34,35])]
df

Unnamed: 0,folder_num,video_name,labels,class_label
834,35,35_535_535_472.avi,mock,2
835,35,35_501_501_451.avi,mock,2
836,35,35_584_584_507.avi,mock,2
837,35,35_31_31_32.avi,mock,2
838,35,35_190_190_287.avi,mock,2
...,...,...,...,...
10669,31,31_739_739_442.avi,cg,1
10670,31,31_678_678_430.avi,cg,1
10671,31,31_322_322_267.avi,cg,1
10672,31,31_592_592_388.avi,cg,1


In [10]:
dest_dir = '/asbdata/Arjun/Bloodi/all_split_videos/'

grouped = df.groupby(df.folder_num)
df_list = grouped.groups.keys()

with strategy.scope():
    
    for j in df_list:
        
        df_sequence = grouped.get_group(j)
        
        features, labels = prepare_all_videos(df_sequence,dest_dir)
        labels = np.expand_dims(labels,axis=1)
    
        print(features.shape)
        print(labels.shape)
    
        np.save(f'/asbdata/Arjun/Bloodi/Split_numpy/videos/{j}_features.npy', features)
        np.save(f'/asbdata/Arjun/Bloodi/Split_numpy/labels/{j}_labels.npy', labels)
        print(f'Completed for video = {j}')

































































































































































































(457, 40, 1280)
(457, 1)
Completed for video = 29


















































































































































































































































































(607, 40, 1280)
(607, 1)
Completed for video = 30
























































































































































































































(483, 40, 1280)
(483, 1)
Completed for video = 31








































































































































































































(452, 40, 1280)
(452, 1)
Completed for video = 32
































































































































































(363, 40, 1280)
(363, 1)
Completed for video = 33






































































































































































(535, 40, 1280)
(535, 1)
Completed for video = 34


































































































































































(561, 40, 1280)
(561, 1)
Completed for video = 35
