Copyright by Arjun Sarkar Research Group Applied Systems Biology - Head: Prof. Dr. Marc Thilo Figge https://www.leibniz-hki.de/en/applied-systems-biology.html HKI-Center for Systems Biology of Infection Leibniz Institute for Natural Product Research and Infection Biology - Hans KnÃ¶ll Insitute (HKI) Adolf-Reichwein-StraÃe 23, 07745 Jena, Germany

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import numpy as np
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import cv2 as cv
import shutil

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"

strategy = tf.distribute.MirroredStrategy(["GPU:0"])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [3]:
df = pd.read_csv('Video_list.csv')
df

Unnamed: 0,folder_num,video_name,labels,class_label
0,19,19_485.avi,cg,1
1,19,19_480.avi,cg,1
2,19,19_491.avi,cg,1
3,19,19_520.avi,cg,1
4,19,19_522.avi,cg,1
...,...,...,...,...
1291,31,31_1059.avi,cg,1
1292,31,31_1069.avi,cg,1
1293,31,31_1063.avi,cg,1
1294,31,31_1088.avi,cg,1


In [4]:
df = df[df['folder_num'].isin([1,2,3,4,5,6,13])]
df

Unnamed: 0,folder_num,video_name,labels,class_label
144,6,6_249.avi,mock,2
145,6,6_272.avi,mock,2
146,6,6_281.avi,mock,2
147,6,6_274.avi,mock,2
148,6,6_269.avi,mock,2
...,...,...,...,...
1195,1,1_38.avi,ca,0
1196,1,1_4.avi,ca,0
1197,1,1_39.avi,ca,0
1198,1,1_25.avi,ca,0


In [5]:
IMG_SIZE = 456
MAX_SEQ_LENGTH = 40
NUM_FEATURES = 2048

In [6]:
def load_video(path):
    
    cap = cv.VideoCapture(path)
    frames = []
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
        
            frame = cv.resize(frame,(IMG_SIZE,IMG_SIZE))
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

    finally:
        cap.release()
    return np.array(frames)

In [7]:
with strategy.scope():
    
    def build_feature_extractor():
        feature_extractor = tf.keras.applications.efficientnet.EfficientNetB5(
            weights="imagenet",
            include_top=False,
            pooling="avg",
            input_shape=(IMG_SIZE, IMG_SIZE, 3),
        )
        preprocess_input = tf.keras.applications.efficientnet.preprocess_input

        inputs = tf.keras.Input((IMG_SIZE, IMG_SIZE, 3))
        preprocessed = preprocess_input(inputs)

        outputs = feature_extractor(preprocessed)
        return tf.keras.Model(inputs, outputs, name="feature_extractor")


    feature_extractor = build_feature_extractor()

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

In [8]:
feature_extractor.summary()

Model: "feature_extractor"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 456, 456, 3)]     0         
                                                                 
 efficientnetb5 (Functional)  (None, 2048)             28513527  
                                                                 
Total params: 28,513,527
Trainable params: 28,340,784
Non-trainable params: 172,743
_________________________________________________________________


In [9]:
def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["video_name"].values.tolist()
    labels = df["class_label"].values

    # `frame_features` are what we will feed to our sequence model.
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        frames = load_video(os.path.join(root_dir, path))
        
        frames = frames[None, ...]

        # Initialize placeholder to store the features of the current video.
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                if np.mean(batch[j, :]) > 0.0:
                    temp_frame_features[i, j, :] = feature_extractor.predict(
                        batch[None, j, :]
                    )

                else:
                    temp_frame_features[i, j, :] = 0.0

        frame_features[idx,] = temp_frame_features.squeeze()

    return frame_features, labels

In [10]:
dest_dir = '/asbdata/Arjun/Bloodi/all_vids_feature_extraction/'

grouped = df.groupby(df.folder_num)
df_list = grouped.groups.keys()

with strategy.scope():
    
    for j in df_list:
        
        df_sequence = grouped.get_group(j)
        
        features, labels = prepare_all_videos(df_sequence,dest_dir)
        labels = np.expand_dims(labels,axis=1)
    
        print(features.shape)
        print(labels.shape)
    
        np.save(f'/asbdata/Arjun/Bloodi/Patch_videos_numpy/videos/{j}_features.npy', features)
        np.save(f'/asbdata/Arjun/Bloodi/Patch_videos_numpy/labels/{j}_labels.npy', labels)
        print(f'Completed for video = {j}')













(48, 40, 2048)
(48, 1)
Completed for video = 1












(48, 40, 2048)
(48, 1)
Completed for video = 2














(48, 40, 2048)
(48, 1)
Completed for video = 3














(48, 40, 2048)
(48, 1)
Completed for video = 4














(48, 40, 2048)
(48, 1)
Completed for video = 5














(48, 40, 2048)
(48, 1)
Completed for video = 6












(48, 40, 2048)
(48, 1)
Completed for video = 13
