In [1]:
import autopool
import numpy as np
import pandas as pd
import keras
import keras.backend as K
import json
import h5py, pytz, datetime
import os
from sklearn.metrics import f1_score

Using TensorFlow backend.


# Set up timezone

In [3]:
nytz = pytz.timezone('US/Eastern')
#ts = datetime.datetime.utcfromtimestamp(f['openl3'][0][0])
#nytz.localize(ts)

# Compute ideal class thresholds

In [4]:
label_to_idx = {
    "engine": 0,
    "machinery-impact": 1,
    "alert-signal": 4,
    "music": 5,
    "human-voice": 6,
    "dog": 7
}

In [5]:
ann_df = pd.read_csv('/beegfs/work/sonyc/ust/annotations/v1.0/annotations.csv')
ann_df = ann_df[np.logical_and(ann_df['split'] == 'validate', ann_df['annotator_id'] == 0)]
file_list = ann_df['audio_filename'].tolist()
label_keys = ["{}_{}_presence".format(int(idx)+1, label) for label, idx in label_to_idx.items()]
label_keys_wo_presence = ["{}_{}".format(int(idx)+1, label) for label, idx in label_to_idx.items()]
ann_df = ann_df[label_keys]

In [6]:
results_path = "/beegfs/work/sonyc/ust/models/1.0.0/coarse/output.csv"

In [7]:
results = pd.read_csv(results_path)

In [8]:
results_files = set(results['audio_filename'].tolist())

In [9]:
idxs = [idx for idx, fname in enumerate(file_list) if str(fname) in results_files]

In [10]:
gt_arr = []
for idx in idxs:
    gt_arr.append([ann_df.iloc[idx][key] for key in label_keys])
gt_arr = np.array(gt_arr)

In [11]:
results_arr = []
for idx, row in results.iterrows():
    results_arr.append([row[key] for key in label_keys_wo_presence])
results_arr = np.array(results_arr)

In [12]:
thresholds = []
for cls_idx in range(results_arr.shape[1]):
    binary_gt = gt_arr[:, cls_idx]
    best_f1 = 0.0
    best_thresh = None
    
    for thresh in sorted(results_arr[:, cls_idx]):
        binary_pred = (results_arr[:, cls_idx] >= thresh).astype(int)
        f1 = f1_score(binary_gt, binary_pred)
        if f1 > best_f1:
            best_f1 = f1
            best_thresh = thresh
    thresholds.append(best_thresh)
    
thresholds = np.array(thresholds)

In [13]:
cls_idxs = list(label_to_idx.values())

# Load model

In [14]:
model = keras.models.load_model('/beegfs/work/sonyc/ust/models/1.0.0/coarse/full_model_best.h5',
                               custom_objects={'AutoPool1D': autopool.AutoPool1D})
frame_model = K.function([model.layers[0].input],
                         [model.layers[-2].output])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


# Compute class_priors

In [32]:
def get_sensor_predictions(fpath):
    pred_dict = {}
    with h5py.File(fpath, 'r') as h5:
        for ex in h5['openl3']:
            ts = datetime.datetime.utcfromtimestamp(ex[0])
            ts = nytz.localize(ts)
            month = ts.month
            dayofweek = "weekday" if ts.weekday() < 5 else "weekend"
            if (21 <= ts.hour < 24) or (0 <= ts.hour < 3):
                hourblock = "21-3"
            elif (3 <= ts.hour < 9):
                hourblock = "3-9"
            elif (9 <= ts.hour < 15):
                hourblock = "9-15"
            elif (15 <= ts.hour < 21):
                hourblock = "15-21"
            
            
            pred = frame_model([ex[2]])[0].squeeze()[:, cls_idxs]
            binary_pred = pred >= thresholds[None, :]
            
            time_tuple = (month, dayofweek, hourblock)
            
            if time_tuple not in pred_dict:
                pred_dict[time_tuple] = {
                    'raw': [],
                    'bin': []
                }
            pred_dict[time_tuple]['raw'].append(pred)
            pred_dict[time_tuple]['bin'].append(binary_pred)
            
    for time_tuple in pred_dict.keys():
        if len(pred_dict[time_tuple]['raw']) > 0:
            pred_dict[time_tuple]['raw'] = np.concatenate(pred_dict[time_tuple]['raw'])
            pred_dict[time_tuple]['bin'] = np.concatenate(pred_dict[time_tuple]['bin'])
        else:
            pred_dict[time_tuple]['raw'] = np.array([])
            pred_dict[time_tuple]['bin'] = np.array([])

    return pred_dict

In [33]:
features_dir = '/beegfs/work/sonyc/features/openl3/2017'
sensor_dict = {}
for fname in os.listdir(features_dir):
    sensor_id = fname.split('.')[0].split('-')[1]
    fpath = os.path.join(features_dir, fname)
    sensor_dict[sensor_id] = get_sensor_predictions(fpath)

In [None]:
import pickle as pk
with open('sensor_predictions.pkl', 'wb') as f:
    pk.dump(sensor_dict, f, protocol=pk.HIGHEST_PROTOCOL)