In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold as skf

## Make split

In [None]:
vid_path = "../../data/pocus_videos/convex"
class_short = ["cov", "pne", "reg"]

In [None]:
vid_files = [v for v in os.listdir(vid_path) if v[:3].lower() in class_short]
labels = [vid[:3].lower() for vid in vid_files]

In [None]:
train_files,test_files, train_labels, test_labels = train_test_split(vid_files, labels, stratify=labels)

In [None]:
np.unique(train_labels, return_counts=True)

In [None]:
np.unique(test_labels, return_counts=True)

In [None]:
MY_FR = 5
DATA_SIZE = 5

data_3d = []
labels_3d = []
files_3d = []
for train_vid, train_lab in zip(test_files, test_labels):
    cap = cv2.VideoCapture(os.path.join(vid_path, train_vid))
    fr = cap.get(5)
    show_every = round(fr/MY_FR)
    print(train_vid, fr, cap.get(7), "available frames:", cap.get(7)/show_every)
    frames_available = cap.get(7)/show_every  
    end_is_close = frames_available % DATA_SIZE >= 4
    number_selected = int(end_is_close) + frames_available//DATA_SIZE
    print(number_selected, cap.get(7), "show every", show_every)
    current_data = []
    # for frame_id in range(int(cap.get(7))):
    while cap.isOpened():
        frame_id = cap.get(1)
        ret, frame = cap.read()
        if (ret != True):
            break
        # plt.imshow(image)
        # plt.show()
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        image = cv2.resize(image, (224, 224))
        
        if frame_id%show_every==0 or (end_is_close and frame_id==int(cap.get(7)-1)):
            current_data.append(image)
        if len(current_data)==DATA_SIZE:
            data_3d.append(current_data)
            labels_3d.append(train_lab)
            files_3d.append(train_vid)
            current_data = []
    cap.release()

In [None]:
np.asarray(data_3d).shape

In [None]:
import pickle
with open("../../data/vid_class_test.dat", "wb") as outfile:
    pickle.dump((data_3d, labels_3d, files_3d), outfile)

## 10 fold - DO NOT USE

In [None]:
vid_files = [v for v in os.listdir(vid_path) if v[:3].lower() in class_short]
labels = [vid[:3].lower() for vid in vid_files]

In [None]:
X = np.array(vid_files)
y = np.array(labels)

skf = StratifiedKFold(n_splits=5, shuffle=True)
skf.get_n_splits(X, y)

video_cross_val = {}

for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
    print("TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    print(np.unique(y_train, return_counts=True))
    train_test_dict = {}
    train_test_dict["train"] = (X_train.tolist(), y_train.tolist())
    train_test_dict["test"] = (X_test.tolist(), y_test.tolist())
    video_cross_val[fold] = train_test_dict

In [None]:
a = [video_cross_val[i]["test"][0] for i in range(5)]
a = [e for b in a for e in b ]
assert len(a)==len(np.unique(a))

### Cross val from cross-validation folder

In [None]:
check = "../../data/cross_validation"
videos_dir = "../../data/pocus_videos/convex"

file_list = []
video_cross_val = {}
for split in range(5):
    train_test_dict = {"test":[[],[]], "train":[[],[]]}
    for folder in os.listdir(check):
        if folder[0]==".":
            continue
        for classe in os.listdir(os.path.join(check, folder)):
            if classe[0]=="." or classe[0]=="u":
                continue
            uni = []
            for file in os.listdir(os.path.join(check, folder, classe)):
                if file[0]=="." or len(file.split("."))==2:
                    continue
                parts = file.split(".")
                if not os.path.exists(os.path.join(videos_dir, parts[0]+"."+parts[1].split("_")[0])):
                    butterfly_name = parts[0][:3]+"_Butterfly_"+parts[0][4:]+".avi"
                    if not os.path.exists(os.path.join(videos_dir,butterfly_name)):
                        print("green dots in video or aibronch", file)
                        continue
                    uni.append(butterfly_name)
                else:
                    uni.append(parts[0]+"."+parts[1].split("_")[0])
            uni_files_in_split = np.unique(uni)
            uni_labels = [vid[:3].lower() for vid in uni_files_in_split]
            
            if folder[-1]==str(split):
                train_test_dict["test"][0].extend(uni_files_in_split)
                train_test_dict["test"][1].extend(uni_labels)
            else:
                train_test_dict["train"][0].extend(uni_files_in_split)
                train_test_dict["train"][1].extend(uni_labels)
    video_cross_val[split] = train_test_dict            

In [None]:
this_class = {"cov": "covid", "pne": "pneumonia", "reg": "regular"}
for i in range(5):
    all_labels = []
    files, labs = video_cross_val[i]["test"]
    for j in range(len(files)):
        if True: # "Butterfly" not in files[j]:
            if not os.path.exists(
                os.path.join(
                    "../../data/cross_validation/split" + str(i),
                    this_class[labs[j]], files[j] + "_frame0.jpg"
                )
            ):
                print(files[j] + "  in  " + str(i))

In [None]:
for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
    print("TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    print(np.unique(y_train, return_counts=True))
    train_test_dict = {}
    train_test_dict["train"] = (X_train.tolist(), y_train.tolist())
    train_test_dict["test"] = (X_test.tolist(), y_test.tolist())
    video_cross_val[fold] = train_test_dict

In [None]:
with open("../../data/cross_val_new.json", "w") as outfile:
    json.dump(video_cross_val, outfile)

### Script for butterfly data to videos

In [None]:
from skvideo import io

In [None]:
from pocovidnet.utils_butterfly_data import *

In [None]:
butterfly_dir = "../../data/butterfly"
out_dir = "../../data/butterfly_test"

In [None]:
actual_names, labels = get_paths("../../data/pocovid_data.csv")
# manually add the ones which I know are in the data
files_to_process, labs_to_process = get_processing_info(
    butterfly_dir, actual_names, labels
)

In [None]:
del_upper = 100
for i in range(1, len(files_to_process)):
    vid_arr = []
    fp = files_to_process[i]
    fn = fp.split(os.sep)[-1]
    cap = cv2.VideoCapture(fp)  # capturing the video from the given path
    # frame rate
    n_frames = cap.get(7)
    frameRate = cap.get(5)
    out_path = os.path.join(out_dir, label_to_dir(labs_to_process[i]).split(os.sep)[1][:3])
    print(out_path)
    print(
        "PROCESS", fn, labs_to_process[i], "framerate", int(cap.get(5)),
        "width", cap.get(3), "height", cap.get(4), "number frames:",
        cap.get(7)
    )
    if os.path.exists(out_path+"_"+fn.split(".")[0]+".mpeg"):
        print("already done, ", out_path+"_"+fn.split(".")[0]+".mpeg")
        continue

    nr_selected = 0
    while cap.isOpened():
        frameId = cap.get(1)  # current frame number
        ret, frame = cap.read()
        if not ret:
            break

        frame = np.asarray(frame).astype(int)
        # width_box = np.min(frame.shape[:2])
        # crop
        width_border = int(cap.get(3) * 0.15)
        width_box = int(cap.get(3)) - 2 * width_border
        if width_box + del_upper > cap.get(4):
            width_box = int(cap.get(4)-del_upper)
            width_border = int(cap.get(3)/2-width_box/2)
        # print(del_upper, width_box, width_border)
        frame = frame[del_upper:width_box +
                      del_upper, width_border:width_box + width_border]
        
        # print(frame.shape)
        # frame = frame[width_border:width_box+width_border]
        # detect green point
        green_point = frame[:, :, 1] - frame[:, :, 0]
        # get first frame for green point deletion:
        if frameId == 0:
            frame_start = green_point
        # skip the green moving points
        if np.any((green_point - frame_start) > 100):
            plt.imshow(green_point)
            plt.show()
            print("VID WITH GREEN DOT")
            break
        # delete blue symbol
        blue_symbol = np.where(green_point < -50)
        frame[blue_symbol] = frame[0, 0]
        # delete green symbol
        if np.any(green_point > 220):
            green_symbol = np.where(green_point > 50)
            frame[green_symbol] = frame[0, 0]
        # resize
        # print(frame.shape)
        frame = np.asarray(frame).astype(np.uint8)
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        frame = cv2.resize(frame, (240, 240))
        if frameId==0:
            plt.imshow(frame)
            plt.show()
        vid_arr.append(frame)
        # SAVE
        # if (frameId % every_x_image == 0):
        #     # storing the frames in a new folder named test_1
        #     filename = out_path + fn + "_frame%d.jpg" % frameId
        #     cv2.imwrite(filename, frame)
        #     nr_selected += 1
    cap.release()
    vid_arr = np.asarray(vid_arr)
    # print(out_path, fp, fn)
    if len(vid_arr)>5:
        io.vwrite(out_path+"_Butterfly_"+fn.split(".")[0]+".mpeg", vid_arr, outputdict={"-vcodec":"mpeg2video"})
        print("DONE", vid_arr.shape)
    else:
        print("GREEN DOT:", fn)

## Double check data

In [None]:
data_path = "/Users/ninawiedemann/Desktop/Projects/covid19_pocus_ultrasound.nosync/data/video_input_data/conv3d_train_fold_1.dat"


In [None]:
with open(
    data_path, "rb"
) as infile:
    X_train, train_labels_text, train_files = pickle.load(infile)

In [None]:
X_train.shape

In [None]:
np.unique(train_labels_text, return_counts=True)

In [None]:
for i, vid in enumerate(X_train):
    print(train_files[i])
    plt.imshow(vid[0, :, :, 0])
    plt.show()

### Find out framerate

In [None]:
for vid in os.listdir("../../data/pocus_videos/convex"):
    if vid[0]==".":
        continue
    cap = cv2.VideoCapture("../../data/pocus_videos/convex/"+vid)
    print(vid, [cap.get(i) for i in range(7)])
    print(cap.get(4))
    cap.release()

## Evaluation

In [None]:
from pocovidnet.evaluate_video import VideoEvaluator
from pocovidnet import VIDEO_MODEL_FACTORY
from pocovidnet.videoto3d import Videoto3D
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import (
    Dense, GlobalAveragePooling3D
)

In [None]:
NUM_FOLDS = 5
class GenesisEvaluator():
    def __init__(self, weights_dir="video_genesis_lr1e4", ensemble=True, split=None, model_id="genesis"):
        """
        Constructor of COVID model evaluator class.
        
        Arguments:
            ensemble {str} -- Whether the model ensemble is used.
            num_classes: must be 3 or 4, how many classes the model was
            trained on
        """
        # self.root = os.path.join('/', *DIR_PATH.split('/')[:-1])
        self.split = split
        self.ensemble = ensemble
        
        if model_id not in VIDEO_MODEL_FACTORY.keys():
            raise ValueError(
                f'Wrong model {model_id}. Options are:{MODEL_FACTORY.keys()}'
            )
        else:
            self.model_id = model_id

        if ensemble:
            # retores 5 weight paths
            self.weights_paths = [
                os.path.join(
                    weights_dir, 'fold_' + str(fold), "variables",
                    "variables"
                ) for fold in range(NUM_FOLDS)
            ]
        else:
            if split is None or split < 0 or split > 4:
                raise ValueError(f'Provide split between 0 and 4, not {split}')
            self.weights_paths = [
                os.path.join(
                    # self.root
                    weights_dir, 'fold_' + str(self.split), "variables",
                    "variables"
                )
            ]

        self.class_mappings = ['covid', 'pneunomia', 'regular']
        # Get Genesis base model
        base_models = [
            VIDEO_MODEL_FACTORY[self.model_id](( 1, 64, 64, 32), batch_normalization=True)
            for _ in range(len(self.weights_paths))
        ]
        # Get model head
        self.models = []
        for mod in base_models:
            x = mod.get_layer('depth_7_relu').output
            x = GlobalAveragePooling3D()(x)
            x = Dense(1024, activation='relu')(x)
            output = Dense(len(self.class_mappings), activation='softmax')(x)
            head_model = Model(inputs=mod.input, outputs=output)
            self.models.append(head_model)

        # restore weights
        try:
            for model, path in zip(self.models, self.weights_paths):
                model.load_weights(path)
        except Exception:
            raise Exception('Error in model restoring.')

        print(f'Model restored. Class mappings are {self.class_mappings}')
         
    def __call__(self, video_path, width=64, depth=5, fr=5):
        # read in video
        vid3d = Videoto3D("",width, width, depth, fr)
        vid3d.max_vid = {"cov": 20, "pne": 20, "reg": 20}
        X_test, _, fn = vid3d.video3d([video_path], ["cov"]) # cov as dummy label
        print(X_test.shape)
        assert len(np.unique(fn))==1

        # prepare for genesis
        input_shape = 42
        input_shape = 1, 64, 64, 32

        X_test = np.transpose(X_test, [0, 4, 2, 3, 1])
        X_test = np.repeat(X_test, [6, 7, 7, 6, 6], axis=-1)
        # res = self.models[0].predict(X_test[0])
        res = [model.predict(X_test) for model in self.models]
        return np.array(res)

In [None]:
gen = GenesisEvaluator(ensemble=False, split=0)
gen("../../data/pocus_videos/convex/Pneu-Atlas-pneumonia.gif")
# prep_vid_snippets("../../data/pocus_videos/convex/"+"Pneu-Atlas-pneumonia.gif")

In [None]:
with open("../../data/video_input_data/cross_val.json", "r") as infile:
    cross_val_split = json.load(infile)

In [None]:
WEIGHTS_DIR = "../video_genesis_lr1e4"
VIDEO_DIR = "../../data/pocus_videos/convex"
all_genesis_preds = []
all_frame_preds = []
for i in range(5):
    # gen_eval = GenesisEvaluator(weights_dir = WEIGHTS_DIR, ensemble=False, split=i)
    # normal_eval = VideoEvaluator(ensemble=False, split=i, model_id="vgg_cam", num_classes=4)
    files = cross_val_split[str(i)]["test"][0]
    # print(files)
    for f in files:
        print("evaluate", f)
        # run genesis model
        vid3d = Videoto3D("", 64, 64, 5, 5)
        vid3d.max_vid = {"cov": 20, "pne": 20, "reg": 20}
        X_test, _, fn = vid3d.video3d(
            [os.path.join(VIDEO_DIR, f)], ["cov"]
        )  # cov as dummy label
        print(X_test.shape)
        assert len(np.unique(fn)) == 1
        
       # preds = gen_eval(os.path.join(VIDEO_DIR, f))
       # vid_pred_genesis = np.argmax(np.mean(preds, axis=(0,1)))
       # all_genesis_preds.append(preds)
       # # run cam model
       # preds_framebased = normal_eval(os.path.join(VIDEO_DIR, f))
       # frame_pred = np.argmax(np.mean(preds_framebased, axis=(0,1)),1)
       # all_frame_preds.append(preds_framebased)
       # print("genesis pred", vid_pred_genesis, "frame based pred", frame_pred)

In [None]:
from tensorflow.keras.applications import VGG16
baseModel = VGG16(
    include_top=True, weights='imagenet', input_tensor=None, input_shape=None,
    pooling=None, classes=1000
    )

In [None]:
vid3d = Videoto3D("",64, 64, 5, 5)
vid3d.max_vid = {"cov": 20, "pne": 20, "reg": 20}
X_test, _, fn = vid3d.video3d(["../../data/pocus_videos/convex/Reg-NormalLungs.mp4"], ["cov"])

In [None]:
X_test.shape

In [None]:
print(len(np.unique(fn)))

## Evaluation of video classification

In [None]:
i,j = cross_val_split[str(i)]["test"]

In [None]:
lab_dict = {"cov":0, "pne":1, "reg":2}
this_class = {"cov":"covid", "pne":"pneumonia", "reg":"regular"}
saved_gt = []

for i in range(5):
    all_labels = []
    files, labs = cross_val_split[str(i)]["test"]
    for j in range(len(files)):
        if not "Butterfly" in files[j]:
            assert os.path.exists(os.path.join("../../data/cross_validation/split"+str(i), this_class[labs[j]], files[j]+"_frame0.jpg")), files[j]+"_"+str(i)
        if files[j]!= "Reg-Youtube.mp4" and files[j]!="Reg-NormalLungs.mp4":
            all_labels.append(lab_dict[labs[j]])
    saved_gt.append(all_labels)

In [None]:
import cv2
cap = cv2.VideoCapture()

In [None]:
cap.get(5)

In [None]:
import pickle
eval_path = "evaluation_outputs.dat"
with open(eval_path, "rb") as infile:
    vidbased, frame_based = pickle.load(infile)

In [None]:
from sklearn.metrics import recall_score, precision_score, classification_report, matthews_corrcoef, balanced_accuracy_score
import pandas as pd

In [None]:
def mcc_multiclass(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    mcc_out = []
    for classe in np.unique(y_true):
        y_true_binary = (y_true==classe).astype(int)
        y_pred_binary = (y_pred==classe).astype(int)
        mcc_out.append(matthews_corrcoef(y_true_binary, y_pred_binary))
    return mcc_out
def specificity(y_true, y_pred):
    # true negatives / negatives
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    spec_out = []
    for classe in np.unique(y_true):
        negatives = np.sum((y_true!=classe).astype(int))
        tn = np.sum((y_pred[y_true!=classe]!=classe).astype(int))
        spec_out.append(tn/negatives)
    return spec_out

In [None]:
classifier = frame_based # , frame_based]):
    
saved_logits = [[] for _ in range(5)]
split_counter = 0
frame_counter = len(saved_gt[0])
for vid_ind in range(len(vidbased)):
    # print(frame_based[vid_ind].shape)
    # print(vid_ind, split_counter)
    saved_logits[split_counter].append(np.argmax(np.mean(classifier[vid_ind], axis=0)))
    # saved_logits[split_counter].append(np.argmax(np.mean(classifier[vid_ind], axis=(0,1))))
    if len(saved_logits[split_counter])==len(saved_gt[split_counter]):
        # next cross val split
        # print(vid_ind, len(saved_gt[split_counter]), split_counter)
        frame_counter += len(saved_gt[split_counter])
        split_counter += 1
assert len(saved_logits[2])==len(saved_gt[2])

all_reports = []
accs = []
bal_accs = []
# vid_accs, _, vid_accs_bal, _ = video_accuracy(saved_logits, saved_gt, saved_files)
for s in range(5):
    gt_s = saved_gt[s]
    print(len(gt_s), saved_logits[s])
    pred_idx_s = saved_logits[s] # np.argmax(np.array(saved_logits[s]), axis=1)
    report = classification_report(
        gt_s, pred_idx_s, target_names=CLASSES, output_dict=True
        )
    mcc_scores = mcc_multiclass(gt_s, pred_idx_s)
    spec_scores = specificity(gt_s, pred_idx_s)
    for i, cl in enumerate(CLASSES):
        report[cl]["mcc"] = mcc_scores[i]
        report[cl]["specificity"] = spec_scores[i]
    df = pd.DataFrame(report).transpose()
    df = df.drop(columns="support")
    df["accuracy"] = [report["accuracy"] for _ in range(len(df))]
    bal = balanced_accuracy_score(gt_s, pred_idx_s)
    df["balanced"] = [bal for _ in range(len(df))]
    # df["video"] = vid_accs[s]
    # df["video_balanced"] = vid_accs_bal[s]
    # print(df[:len(CLASSES)])
    #print(report["accuracy"])
    # print(np.array(df)[:3,:])
    accs.append(report["accuracy"])
    bal_accs.append(balanced_accuracy_score(gt_s, pred_idx_s))
    # df = np.array(report)
    all_reports.append(np.array(df)[:len(CLASSES)])
df_arr = np.around(np.mean(all_reports, axis=0), 2)
df_classes = pd.DataFrame(df_arr, columns=["Precision", "Recall", "F1-score", "MCC", "Specificity", "Accuracy", "Balanced"], index=CLASSES)
print(df_classes)
df_std = np.around(np.std(all_reports, axis=0), 2)
df_std = pd.DataFrame(df_std, columns=["Precision", "Recall", "F1-score", "MCC", "Specificity", "Accuracy", "Balanced"], index=CLASSES)

df_classes = df_classes[["Accuracy", "Balanced", "Precision", "Recall","Specificity", "F1-score", "MCC"]]
df_std = df_std[["Accuracy", "Balanced", "Precision", "Recall","Specificity", "F1-score", "MCC"]]

# df_classes.to_csv("model_comparison/vid_cam_3_mean.csv")
# df_std.to_csv("model_comparison/vid_cam_3_std.csv")
