# Average pooling + Logistic regression classifier
#### read and preprocess training and validation dataset for average pool, validation dataset

In [3]:
import re
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from preprocess import get_records, preprocess_for_logistic, read_records
import eval_util as eval
from report import report_performance,make_top_n_pred_df

In [4]:

# get and read each records from the desired directory 
r = re.compile("^train.+\\.tfrecord$")
train_dir  = "/Users/shufanxia/Documents/frame-level/" #change 
val_dir = "/Users/shufanxia/Documents/validate-frame/" # change
test_dir = "/Users/shufanxia/Documents/test-frame/" #change
frames_train = get_records(train_dir,"train")
frames_val = get_records(val_dir,"validate")
frames_test = get_records(test_dir,"validate") # we reserve one validation frame record for test

n_labels = 1000
feat_rgb,feat_audio,feat_pseudoid,feat_labels = read_records(frames_train)
feat_rgb_val,feat_audio_val,feat_pseudoid_val,feat_labels_val = read_records(frames_val)
feat_rgb_test,feat_audio_test,feat_pseudoid_test,feat_labels_test = read_records(frames_test)

# call 
X_rgb_train, X_audio_train,y_train = preprocess_for_logistic(feat_rgb,feat_audio,feat_labels,n_labels)
X_rgb_val, X_audio_val,y_val = preprocess_for_logistic(feat_rgb_val,feat_audio_val,feat_labels_val,n_labels)
X_rgb_test, X_audio_test,y_test = preprocess_for_logistic(feat_rgb_test,feat_audio_test,feat_labels_test,n_labels)

2022-04-14 14:46:34.117486: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Baseline logsitic model

In [3]:

def prepare_logistic(input_type = "rgb",X_rgb_train=None,X_audio_train = None, y_train=None,
                     X_rgb_val=None,X_audio_val= None,y_val=None):
    # build and train a one vs all multiclass classifier
    # choose from three types of inputs
    if input_type == "rgb":
        X_train = X_rgb_train
        X_val = X_rgb_val
    elif input_type == "audio":
        X_train = X_audio_train
        X_val = X_audio_val
    elif input_type == "both":
        # if using both, simply concatenate audio after rgb
        X_train = tf.concat([X_rgb_train, X_audio_train],1)
        X_val = tf.concat([X_rgb_val, X_audio_val],1)
    else:
        print("invalid input type")
        raise ValueError
    return X_train,X_val,y_train,y_val


# define a logistic regression using one tf Dense layer 
# input (batch_size, 1024), output (batch_size, 1000)
def build_logistic(l2= 1e-8):
    logistic_reg = tf.keras.models.Sequential([
        tf.keras.layers.Dense(n_labels, activation='sigmoid',
        kernel_regularizer=tf.keras.regularizers.L2(l2))])

    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
    logistic_reg.compile(optimizer,
                loss=tf.keras.losses.BinaryCrossentropy())
    return logistic_reg


#### With RGB inputs only

In [None]:
X_train_rgb,X_val_rgb,y_train_rgb,y_val = prepare_logistic(input_type = "rgb",
                                                X_rgb_train = X_rgb_train,y_train =y_train, 
                                                X_rgb_val=X_rgb_val, y_val=y_val)
# define gAP by importing the function                                                 
gAP = eval.calculate_gap                               
# one vs all multiclass classifier, print binary loss along the way  
callback = tf.keras.callbacks.EarlyStopping(monitor='gAP', patience=10)   
logistic_reg_rgb = build_logistic(l2= 1e-6)                                      
logistic_reg_rgb.fit(X_train_rgb,y_train_rgb,epochs=800,
                    batch_size=500,
                    validation_data = (X_val_rgb,y_val),callbacks = [callback])


##### Evaluate model performance

In [8]:

print("training")
y_predproba_train = logistic_reg_rgb.predict(X_train_rgb)
gAP_train,PERR_train, HIT1_train,F1_optimal_train,thresh_optimal_train = report_performance(y_predproba_train,y_train,verbose=True, thresh = 0.5)

print("\nvalidation")
y_predproba_val = logistic_reg_rgb.predict(X_val_rgb)
gAP_val,PERR_val, HIT1_val,F1_optimal_val,thresh_optimal_val = report_performance(y_predproba_val,y_val,verbose=True, thresh_step=0.01)
gAP_val,PERR_val, HIT1_val,F1_val= report_performance(y_predproba_val,y_val,verbose=True, thresh = 0.5)

print("\ntest")
X_test = X_rgb_test
y_predproba_test = logistic_reg_rgb.predict(X_test)
y_predproba_test = logistic_reg_rgb.predict(X_test)
gAP_test,PERR_test, HIT1_val,F1_optimal_test,thresh_optimal_test= report_performance(y_predproba_test,y_test)
gAP_test,PERR_test, HIT1_test,F1_test= report_performance(y_predproba_test,y_test,verbose=True, thresh = 0.5)

training
gAP = 0.3410, PERR = 0.4231, HIT1 = 0.5860
Optimal weigthed F1 score 0.4075 when treshold = 0.9100

validation
gAP = 0.1060, PERR = 0.3130, HIT1 = 0.4593
Optimal weigthed F1 score 0.2815 when treshold = 0.0100
gAP = 0.1060, PERR = 0.3130, HIT1 = 0.4593
Weigthed F1 score 0.2736 when treshold = 0.9100

test
gAP = 0.0989, PERR = 0.3015, HIT1 = 0.4493
Optimal weigthed F1 score 0.2774 when treshold = 0.0100
gAP = 0.0989, PERR = 0.3015, HIT1 = 0.4493
Weigthed F1 score 0.2669 when treshold = 0.9100


#### Export prediction result

In [9]:
### raw version
pred_df_raw = pd.DataFrame({"pseudo_id": feat_pseudoid_test,
                            "y_true":tf.cast(y_test,tf.int32).numpy().tolist(),
                            "y_predproba":y_predproba_test.tolist()})
pred_df_raw.to_pickle('logistic_video_rgb_raw.pkl')
pred_df_raw.head()

Unnamed: 0,pseudo_id,y_true,y_predproba
0,PA8l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[5.833747113853649e-13, 0.9992051720619202, 1...."
1,fx8l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[6.890864799012679e-19, 2.6637591438172237e-11..."
2,8s8l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[2.662541576103905e-10, 3.4842759788489275e-08..."
3,x48l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1.3633198302914762e-12, 1.2680714979589713e-1..."
4,HP8l,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, ...","[0.014418601989746094, 1.9882767578627975e-13,..."


In [10]:
pred_df = make_top_n_pred_df(feat_pseudoid_test,y_predproba_test,feat_labels_test,top_n_pred =5,get_names=False)
pred_df.to_pickle('logistic_video_rgb.pkl')
pred_df.head(10)

Unnamed: 0,pseudo_id,label_true,label_pred,predict_proba
0,PA8l,"[89, 201]","[1, 163, 8, 243, 357]","[0.9992051720619202, 0.995537519454956, 0.9845..."
1,fx8l,"[15, 277, 400]","[7, 2, 6, 11, 17]","[0.17491665482521057, 3.835577899735654e-08, 1..."
2,8s8l,[25],"[1, 14, 0, 11, 7]","[3.4842759788489275e-08, 5.7413984677623375e-0..."
3,x48l,[305],"[2, 68, 55, 30, 11]","[0.999945342540741, 0.9989584684371948, 0.2179..."
4,HP8l,"[4, 10, 13]","[4, 9, 10, 0, 13]","[1.0, 0.033388108015060425, 0.0297219753265380..."
5,tO8l,"[0, 12]","[0, 12, 1, 3, 15]","[1.0, 0.9995517730712891, 9.77603207275024e-08..."
6,HL8l,"[21, 23, 24, 73, 504]","[24, 23, 21, 17, 96]","[1.0, 1.0, 1.0, 0.9999964237213135, 0.99810808..."
7,er8l,"[0, 1, 139]","[0, 126, 139, 6, 5]","[1.0, 0.9999950528144836, 0.002332746982574463..."
8,LH8l,[6],"[6, 7, 42, 8, 149]","[1.0, 0.9857913255691528, 0.002655029296875, 5..."
9,h88l,"[48, 258, 2162]","[499, 132, 566, 533, 67]","[1.0, 1.0, 1.0, 1.0, 1.0]"


### With just audio

In [None]:
X_train_audio,X_val_audio,y_train_audio,y_val_audio = prepare_logistic(input_type = "audio",
                                                X_audio_train = X_audio_train,y_train =y_train, 
                                                X_audio_val=X_audio_val, y_val=y_val)
gAP = eval.calculate_gap    
                           
# one vs all multiclass classifier, print binary loss along the way  
callback = tf.keras.callbacks.EarlyStopping(monitor='gAP', patience=10)   
logistic_reg_audio = build_logistic(l2= 1e-6)                                      
logistic_reg_audio.fit(X_train_audio,y_train_audio,epochs=800,
                    batch_size=500,
                    validation_data = (X_val_audio,y_val_audio),callbacks = [callback])


##### Evaluate model performance


In [14]:

print("training")
y_predproba_train = logistic_reg_audio.predict(X_train_audio)
gAP_train,PERR_train, HIT1_train,F1_optimal_train,thresh_optimal_train = report_performance(y_predproba_train,y_train_audio,verbose=True, thresh=0.5)

print("\nvalidation")
y_predproba_val = logistic_reg_audio.predict(X_val_audio)
gAP_val,PERR_val, HIT1_val,F1_optimal_val,thresh_optimal_val = report_performance(y_predproba_val,y_val_audio,verbose=True, thresh = 0.5)
gAP_val,PERR_val, HIT1_val,F1_val= report_performance(y_predproba_val,y_val,verbose=True, thresh=thresh_optimal_train)

print("\ntest")
y_predproba_test = logistic_reg_audio.predict(X_audio_test)
gAP_val,PERR_test, HIT1_val,F1_optimal_test,thresh_optimal_test= report_performance(y_predproba_test,y_test)
gAP_val,PERR_test, HIT1_test,F1_test= report_performance(y_predproba_test,y_test,verbose=True, thresh=0.5)


training
gAP = 0.1778, PERR = 0.3078, HIT1 = 0.4144
Optimal weigthed F1 score 0.2858 when treshold = 0.4800

validation
gAP = 0.0991, PERR = 0.2526, HIT1 = 0.3440
Optimal weigthed F1 score 0.2416 when treshold = 0.6300
gAP = 0.0991, PERR = 0.2526, HIT1 = 0.3440
Weigthed F1 score 0.2382 when treshold = 0.4800

test
gAP = 0.1015, PERR = 0.2718, HIT1 = 0.3696
Optimal weigthed F1 score 0.2769 when treshold = 0.5900
gAP = 0.1015, PERR = 0.2718, HIT1 = 0.3696
Weigthed F1 score 0.2744 when treshold = 0.4800


#### Export prediction result for model using audio only


In [None]:
pred_df_raw_audio = pd.DataFrame({"pseudo_id": feat_pseudoid_test,
                            "y_true":tf.cast(y_test,tf.int32).numpy().tolist(),
                            "y_predproba":y_predproba_test.tolist()})
pred_df_raw_audio.to_pickle('logistic_video_audio_raw.pkl')
pred_df_raw_audio.head()
pred_df_audio = make_top_n_pred_df(feat_pseudoid_test,y_predproba_test,feat_labels_test,top_n_pred =5,get_names=False)
pred_df_audio.to_pickle('logistic_video_audio.pkl')
pred_df_audio.head(10)

### With video + audio

In [17]:
X_train_both,X_val_both,y_train_both,y_val_both = prepare_logistic(input_type = "both",
                                                X_rgb_train = X_rgb_train, X_audio_train = X_audio_train,y_train =y_train, 
                                                X_rgb_val = X_rgb_val,X_audio_val=X_audio_val, y_val=y_val)
gAP = eval.calculate_gap                               
# one vs all multiclass classifier, print binary loss along the way  
callback = tf.keras.callbacks.EarlyStopping(monitor='gAP', patience=10)   
logistic_reg_both = build_logistic(l2= 1e-6)                                      
logistic_reg_both.fit(X_train_both,y_train_both,epochs=800,
                    batch_size=500,
                    validation_data = (X_val_both,y_val_both),callbacks = [callback])


Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 33/800
Epoch 34/800
Epoch 35/800
Epoch 36/800
Epoch 37/800
Epoch 38/800
Epoch 39/800
Epoch 40/800
Epoch 41/800
Epoch 42/800
Epoch 43/800
Epoch 44/800
Epoch 45/800
Epoch 46/800
Epoch 47/800
Epoch 48/800
Epoch 49/800
Epoch 50/800
Epoch 51/800
Epoch 52/800
Epoch 53/800
Epoch 54/800
Epoch 55/800
Epoch 56/800
Epoch 57/800
Epoch 58/800
Epoch 59/800
Epoch 60/800
Epoch 61/800
Epoch 62/800
Epoch 63/800
Epoch 64/800
Epoch 65/800
Epoch 66/800
Epoch 67/800
Epoch 68/800
Epoch 69/800
Epoch 70/800
Epoch 71/800
Epoch 72/800
Epoch 73/800
Epoch 74/800
Epoch 75/800
Epoch 76/800
Epoch 77/800
Epoch 78

<keras.callbacks.History at 0x1d68b13f0>

##### Evaluate model performance

In [20]:
print("training")
y_predproba_train = logistic_reg_both.predict(X_train_both)
gAP_train,PERR_train, HIT1_train,F1_optimal_train,thresh_optimal_train = report_performance(y_predproba_train,y_train,verbose=True, thresh=0.5)

print("\nvalidation")
y_predproba_val = logistic_reg_both.predict(X_val_both)
gAP_val,PERR_val, HIT1_val,F1_val= report_performance(y_predproba_val,y_val,verbose=True, thresh=0.5)

print("\ntest")
X_test_both = tf.concat([X_rgb_test, X_audio_test],1)
y_predproba_test = logistic_reg_both.predict(X_test_both)
gAP_test,PERR_test, HIT1_test,F1_test= report_performance(y_predproba_test,y_test,verbose=True, thresh=0.5)


training
gAP = 0.4136, PERR = 0.4634, HIT1 = 0.6361
Optimal weigthed F1 score 0.4637 when treshold = 0.8700

validation
gAP = 0.1394, PERR = 0.3419, HIT1 = 0.4967
Optimal weigthed F1 score 0.3223 when treshold = 0.8200
gAP = 0.1394, PERR = 0.3419, HIT1 = 0.4967
Weigthed F1 score 0.3202 when treshold = 0.8700

test
gAP = 0.1214, PERR = 0.3055, HIT1 = 0.4891
Optimal weigthed F1 score 0.3182 when treshold = 0.0500
gAP = 0.1214, PERR = 0.3055, HIT1 = 0.4891
Weigthed F1 score 0.3119 when treshold = 0.8700



#### Export prediction result for models using both audio and rgb


In [21]:

pred_df_raw_both = pd.DataFrame({"pseudo_id": feat_pseudoid_test,
                            "y_true":tf.cast(y_test,tf.int32).numpy().tolist(),
                            "y_predproba":y_predproba_test.tolist()})
pred_df_raw_both.to_pickle('logistic_video_both_raw.pkl')
pred_df_raw_both.head()

pred_df_both = make_top_n_pred_df(feat_pseudoid_test,y_predproba_test,feat_labels_test,top_n_pred =5,get_names=False)
pred_df_both.to_pickle('logistic_video_both.pkl')
pred_df_both.head(10)

Unnamed: 0,pseudo_id,y_true,y_predproba
0,PA8l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1.4457046862328298e-34, 1.0796940381470904e-1..."
1,fx8l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1.0681371585830046e-11, 9.750609251614462e-20..."
2,8s8l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.7716702222824097, 5.3249266784405336e-05, 1..."
3,x48l,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[5.4065880089181496e-15, 5.4559885029448196e-0..."
4,HP8l,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, ...","[1.0204723929238453e-26, 5.918296519524802e-09..."
