### Demo performing prediction of using meta-learner

#### import packages

In [None]:
from torch.autograd import Variable
print(torch.cuda.is_available())

from ensemble_learners import ensemble_regressor_cnn

import numpy as np
import pandas as pd
import random

#### load data

In [None]:
# set paras
dirpath_data = '/mnt/Data02/DataDL/Ensemble/INSTANCE_prediction_02/original/training/CNN_regress/'
dirpath_data2 = '/mnt/Data02/DataDL/Ensemble/INSTANCE_prediction_02/original/prediction/CNN_regress/'

# list files
flnm_list = [ 'Instance_events_counts_4pick_filt_snr_-2_2_testing.csv',
            'Instance_events_counts_4pick_filt_snr_2_4_testing.csv',
             'Instance_events_counts_4pick_filt_snr_4_6_testing.csv',
             'Instance_events_counts_4pick_filt_snr_6_8_testing.csv',
             'Instance_events_counts_4pick_filt_snr_8_10_testing.csv',
             'Instance_events_counts_4pick_filt_snr_10_12_testing.csv',
             'Instance_events_counts_4pick_filt_snr_12_14_testing.csv',
             'Instance_events_counts_4pick_filt_snr_14_16_testing.csv',
            'Instance_events_counts_4pick_filt_snr_16_18_testing.csv',
            'Instance_events_counts_4pick_filt_snr_18_20_testing.csv',
            'Instance_events_counts_4pick_filt_snr_20_25_testing.csv',
            'Instance_events_counts_4pick_filt_snr_25_30_testing.csv',
            'Instance_events_counts_4pick_filt_snr_30_35_testing.csv',
            'Instance_events_counts_4pick_filt_snr_35_40_testing.csv',
            'Instance_events_counts_4pick_filt_snr_40_100_testing.csv'
        ]

# dirs and parameters
dirpath_csv = '/mnt/Data02/DataDL/Ensemble/noisy_INSTANCE_02/'
dirpath_pred = '/mnt/Data02/DataDL/Ensemble/INSTANCE_prediction_02/original/'

# load predictions
mdlnm_list = ["eqt_original","eqt_ethz","eqt_instance","eqt_scedc","eqt_stead","eqt_neic"]
nmdls, nflnms = len(mdlnm_list), len(flnm_list)
ntrace, nt, t, dt, nc, mc = 15000, 6000, 60, 0.01, 3, 1

#### perform prediction

In [None]:
for ifl, flnm in enumerate(flnm_list[:]):
    # initializing
    pdata = np.zeros([ntrace,mc,nmdls,nt], dtype=np.float32)
    trace_name_train_list, trace_stt_train_list, tp_sample_train_list, ts_sample_train_list = [], [], [], []
    model_fnm = 'Instance_training_set_original_tp_L2k.pt'
    valid_fnm = flnm[:-4]+'_prediction_tp_L2k'
    print(f'working on valid_fnm={valid_fnm}')

    # load csv
    csv_reader = pd.read_csv(dirpath_csv+flnm)
    trace_name_list = csv_reader['trace_name'].values
    trace_stt_list = csv_reader['trace_start_time'].values
    tp_sample_list = csv_reader['p_arrival_sample'].values
    ts_sample_list = csv_reader['s_arrival_sample'].values

    for imdl, mdlnm in enumerate(mdlnm_list):
        print(f'working on imdl, ifl = {imdl}, {ifl}')
        pred_npy = np.load(dirpath_pred+flnm[:-4]+'_pred_'+mdlnm_list[imdl]+'.npy')
        pdata[:, 0, imdl, :] = pred_npy[:,1,:]
  
    # prepare data and labels
    ntwin = 2000
    cdata = np.zeros([ntrace, 1, nmdls, ntwin], dtype=np.float32) # Put P&S together, [N,C,H,W]
    clabels = np.zeros([ntrace, 1], dtype=np.float32) # labels
    for isamp in range(ntrace):
        # extract manual/labled picks
        itp, its = tp_sample_list[isamp], ts_sample_list[isamp]
        # cut and write data
        itind = random.randint(100, 1900)
        if itp-itind+ntwin >= nt:
            itind = itp+ntwin-nt
        if itp-itind<0:
            itind = itp
        cdata[isamp] = pdata[isamp,0:1,:,itp-itind:itp-itind+ntwin]
        # make label between 0 and 1
        clabels[isamp] = itind/ntwin

    # del pdata and free up memory
    del pdata

    # prepare dataloader
    batch_size=500
    shuffle = False # enforce to False
    test_load=torch.utils.data.DataLoader(dataset=cdata, batch_size=batch_size, shuffle=shuffle)

    # load model
    model = torch.load(dirpath_data+model_fnm)
    model.eval()

    # setup for prediction
    CUDA = True
    if CUDA:
        model=model.cuda()
    print('model is loaded and configured!')

    # save validated dataset
    test_data_arr = np.zeros(cdata.shape)
    test_pred_arr = np.zeros(clabels.shape)
    test_truth_arr = np.zeros(clabels.shape)
    for i, batch in enumerate(test_load):
        labels = torch.Tensor(clabels[i*batch_size:(i+1)*batch_size])
        if CUDA:
            batch =Variable(batch.cuda())
            labels =Variable(labels.cuda())
        else:
            batch =Variable(batch)
            labels =Variable(labels)

        if i%2 == 0:
            mtrace = (i+1)*batch_size
            print(f'Prediction: completing {mtrace/ntrace:>3f}')

        data = batch.detach().cpu().numpy()
        pred = model(batch).detach().cpu().numpy()
        truth = labels.detach().cpu().numpy()

        test_data_arr[i*batch_size:(i+1)*batch_size] = data
        test_pred_arr[i*batch_size:(i+1)*batch_size] = pred
        test_truth_arr[i*batch_size:(i+1)*batch_size] = truth

    # simple statistical calculation
    misfit = test_pred_arr*ntwin/100 - test_truth_arr*ntwin/100
    misfit_ave = np.mean(misfit[3000:])
    misfit_mae = np.mean(np.abs(misfit[3000:]))
    misfit_rmse = np.sqrt(np.mean(misfit[3000:]**2))
    misfit_std = np.std(misfit[3000:])
    misfit_median = np.median(misfit[3000:])
    print(f'count, misfit_ave, misfit_mae, misfit_rmse, misfit_std = {len(misfit[3000:])}, {misfit_ave:>3f}, {misfit_mae:>3f}, {misfit_rmse:>3f}, {misfit_std:>3f}')

    np.save(dirpath_data2+valid_fnm+'_data.npy', test_data_arr)
    np.save(dirpath_data2+valid_fnm+'_pred.npy', test_pred_arr)
    np.save(dirpath_data2+valid_fnm+'_truth.npy', test_truth_arr)
    np.save(dirpath_data2+valid_fnm+'_stats.npy', np.array([misfit_ave,misfit_mae,misfit_rmse,misfit_std,misfit_median]))