In [7]:
import re
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from preprocess import get_records, preprocess_for_logistic, read_records


# read and preprocess training and validation dataset for average pool, validation dataset

In [2]:
r = re.compile("^train.+\\.tfrecord$")
train_dir  = "/Users/shufanxia/Documents/frame-level/"
val_dir = "/Users/shufanxia/Documents/validate-frame/"
test_dir = "/Users/shufanxia/Documents/test-frame/"
frames_train = get_records(train_dir,"train")
frames_val = get_records(val_dir,"validate")
frames_test = get_records(test_dir,"validate") # we reserve one validation frame record for test

n_labels = 1000
feat_rgb,feat_audio,feat_pseudoid,feat_labels = read_records(frames_train)
X_rgb_train, X_audio_train,y_train = preprocess_for_logistic(feat_rgb,feat_audio,feat_labels,n_labels)

feat_rgb_val,feat_audio_val,feat_pseudoid_val,feat_labels_val = read_records(frames_val)
X_rgb_val, X_audio_val,y_val = preprocess_for_logistic(feat_rgb_val,feat_audio_val,feat_labels_val,n_labels)

feat_rgb_test,feat_audio_test,feat_pseudoid_test,feat_labels_test = read_records(frames_test)
X_rgb_test, X_audio_test,y_test = preprocess_for_logistic(feat_rgb_test,feat_audio_test,feat_labels_test,n_labels)

2022-04-09 23:37:45.837740: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Baseline logsitic model

In [3]:

def prepare_logistic(input_type = "rgb",X_rgb_train=None,X_audio_train = None, y_train=None,
                     X_rgb_val=None,X_audio_val= None,y_val=None):
    # build and train a one vs all multiclass classifier
    # choose from three types of inputs
    if input_type == "rgb":
        X_train = X_rgb_train
        X_val = X_rgb_val
    elif input_type == "audio":
        X_train = X_audio_train
        X_val = X_audio_val
    elif input_type == "both":
        X_train = tf.concat([X_rgb_train, X_audio_train],1)
        X_val = tf.concat([X_rgb_val, X_audio_val],1)
    else:
        print("invalid input type")
        raise ValueError
    return X_train,X_val,y_train,y_val

def build_logistic(l2= 1e-8):
    logistic_reg = tf.keras.models.Sequential([
        tf.keras.layers.Dense(n_labels, activation='sigmoid',
        kernel_regularizer=tf.keras.regularizers.L2(l2))])

    optimizer = tf.keras.optimizers.SGD()
    logistic_reg.compile(optimizer,
                loss=tf.keras.losses.BinaryCrossentropy())
    return logistic_reg


##### With just rgb

In [4]:
X_train,X_val,y_train,y_val = prepare_logistic(input_type = "rgb",
                                                X_rgb_train = X_rgb_train,y_train =y_train, 
                                                X_rgb_val=X_rgb_val, y_val=y_val)
                                                
# one vs all multiclass classifier, print binary loss along the way     
logistic_reg = build_logistic(l2= 1e-8)                                      
logistic_reg.fit(X_train,y_train,epochs=800,
                    batch_size=500,
                    validation_data = (X_val,y_val))

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 33/800
Epoch 34/800
Epoch 35/800
Epoch 36/800
Epoch 37/800
Epoch 38/800
Epoch 39/800
Epoch 40/800
Epoch 41/800
Epoch 42/800
Epoch 43/800
Epoch 44/800
Epoch 45/800
Epoch 46/800
Epoch 47/800
Epoch 48/800
Epoch 49/800
Epoch 50/800
Epoch 51/800
Epoch 52/800
Epoch 53/800
Epoch 54/800
Epoch 55/800
Epoch 56/800
Epoch 57/800
Epoch 58/800
Epoch 59/800
Epoch 60/800
Epoch 61/800
Epoch 62/800
Epoch 63/800
Epoch 64/800
Epoch 65/800
Epoch 66/800
Epoch 67/800
Epoch 68/800
Epoch 69/800
Epoch 70/800
Epoch 71/800
Epoch 72/800
Epoch 73/800
Epoch 74/800
Epoch 75/800
Epoch 76/800
Epoch 77/800
Epoch 78

<keras.callbacks.History at 0x1d794c2e0>

##### With just audio

##### With video + audio

#### Evaluate model performance
consider tuning learning rate, regularization strength, by looking at F1 score 

In [8]:
import eval_util as eval
from report import report_performance,make_top_n_pred_df,get_label

print("training")
y_predproba_train = logistic_reg.predict(X_train)
gAP_train,PERR_train, HIT1_train,F1_optimal_train,thresh_optimal_train = report_performance(y_predproba_train,y_train,verbose=True, thresh_step=0.01,thresh=None)

print("\nvalidation")
y_predproba_val = logistic_reg.predict(X_val)
gAP_val,PERR_val, HIT1_val,F1_optimal_val,thresh_optimal_val = report_performance(y_predproba_val,y_val,verbose=True, thresh_step=0.01)
gAP_val,PERR_val, HIT1_val,F1_val= report_performance(y_predproba_val,y_val,verbose=True, thresh=thresh_optimal_train)

print("\ntest")
X_test = X_rgb_test
y_predproba_test = logistic_reg.predict(X_test)
y_predproba_test = logistic_reg.predict(X_test)
gAP_val,PERR_test, HIT1_val,F1_optimal_test,thresh_optimal_test= report_performance(y_predproba_test,y_test)
gAP_val,PERR_test, HIT1_test,F1_test= report_performance(y_predproba_test,y_test,verbose=True, thresh=thresh_optimal_train)

training
gAP = 0.3456, PERR = 0.4241, HIT1 = 0.5939
Optimal weigthed F1 score 0.4215 when treshold = 0.7800

validation
gAP = 0.1102, PERR = 0.3283, HIT1 = 0.4769
Optimal weigthed F1 score 0.2909 when treshold = 0.0200
gAP = 0.1102, PERR = 0.3283, HIT1 = 0.4769
Weigthed F1 score 0.2850 when treshold = 0.7800

test
gAP = 0.0952, PERR = 0.2996, HIT1 = 0.4783
Optimal weigthed F1 score 0.2834 when treshold = 0.4300
gAP = 0.0952, PERR = 0.2996, HIT1 = 0.4783
Weigthed F1 score 0.2749 when treshold = 0.7800


#### Export prediction result

In [25]:
### raw version
pred_df_raw = pd.DataFrame({"pseudo_id": feat_pseudoid_test,
                            "y_true":tf.cast(y_test,tf.int32).numpy().tolist(),
                            "y_predproba":y_predproba_test.tolist()})
pred_df_raw.to_pickle('logistic_video_rgb_raw.pkl')
pred_df_raw.head()

Unnamed: 0,pseudo_id,y_true,y_predproba
0,PA8l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[3.8626872916723435e-17, 2.948607131434357e-23..."
1,fx8l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1.9990340850016678e-32, 3.1517587747564163e-1..."
2,8s8l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[2.9847021778550697e-06, 1.2619893906197933e-1..."
3,x48l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[5.172878659166595e-14, 1.6292141247209457e-12..."
4,HP8l,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, ...","[6.187164929699618e-12, 2.0612565473920874e-12..."


In [22]:
pred_df = make_top_n_pred_df(feat_pseudoid_test,y_predproba_test,feat_labels_test,top_n_pred =5,get_names=False)
pred_df.to_pickle('logistic_video_rgb.pkl')
pred_df.head(10)

Unnamed: 0,pseudo_id,label_true,label_pred,predict_proba
0,PA8l,"[89, 201]","[3, 26, 13, 463, 8]","[1.0, 1.0, 0.9999810457229614, 0.0274487733840..."
1,fx8l,"[15, 277, 400]","[6, 11, 25, 16, 33]","[1.5098697403459482e-08, 1.4216440469283498e-0..."
2,8s8l,[25],"[14, 0, 2, 25, 1]","[0.0004627108573913574, 2.9847021778550697e-06..."
3,x48l,[305],"[2, 55, 387, 79, 17]","[1.0, 1.0, 0.9999998211860657, 0.9994552135467..."
4,HP8l,"[4, 10, 13]","[4, 3, 9, 13, 71]","[4.525904660113156e-05, 3.049834504054161e-06,..."
5,tO8l,"[0, 12]","[0, 12, 9, 96, 34]","[1.0, 1.0, 1.3159846275812015e-05, 1.481507894..."
6,HL8l,"[21, 23, 24, 73, 504]","[24, 73, 23, 21, 956]","[1.0, 1.0, 1.0, 1.0, 0.99940025806427]"
7,er8l,"[0, 1, 139]","[0, 36, 12, 6, 112]","[1.0, 1.0, 1.0, 0.9998304843902588, 0.03641775..."
8,LH8l,[6],"[964, 6, 8, 25, 3]","[1.0, 1.0, 0.9999704360961914, 0.1540841460227..."
9,h88l,"[48, 258, 2162]","[278, 35, 41, 43, 44]","[1.0, 1.0, 1.0, 1.0, 1.0]"


In [23]:
pred_df_name = make_top_n_pred_df(feat_pseudoid_test,y_predproba_test,feat_labels_test,top_n_pred =5,get_names=True)
pred_df_name.head(10)
pred_df


Unnamed: 0,pseudo_id,label_true,label_pred,predict_proba
0,PA8l,"[89, 201]","[3, 26, 13, 463, 8]","[1.0, 1.0, 0.9999810457229614, 0.0274487733840..."
1,fx8l,"[15, 277, 400]","[6, 11, 25, 16, 33]","[1.5098697403459482e-08, 1.4216440469283498e-0..."
2,8s8l,[25],"[14, 0, 2, 25, 1]","[0.0004627108573913574, 2.9847021778550697e-06..."
3,x48l,[305],"[2, 55, 387, 79, 17]","[1.0, 1.0, 0.9999998211860657, 0.9994552135467..."
4,HP8l,"[4, 10, 13]","[4, 3, 9, 13, 71]","[4.525904660113156e-05, 3.049834504054161e-06,..."
...,...,...,...,...
271,2O8l,"[3, 4]","[4, 3, 13, 9, 6]","[1.0, 0.9999977946281433, 2.437684997858014e-0..."
272,LG8l,"[2, 18, 43, 59, 60, 76]","[6, 18, 43, 19, 2]","[0.0035650432109832764, 6.222427600732772e-06,..."
273,8n8l,"[39, 121, 156, 338]","[39, 202, 31, 35, 121]","[1.0, 0.9999967813491821, 0.909619152545929, 0..."
274,if8l,"[2, 30]","[0, 8, 14, 6, 2]","[1.010332373319045e-10, 2.0133398420663084e-11..."
