# Import Skydrifter

In [None]:
import sys
sys.path.append(r"C:\Users\bonda\Documents\Bondan\TEWS Testing\skydrifter")
import skydrifter as sd
from skydrifter.Utils.partisan import *
from skydrifter.Utils.nonpartisan import *

In [None]:
from skydrifter.PlotModeCollection.plotting import *

# Import Library

In [None]:
import torch
from torch import nn,optim
from torch.utils.data import DataLoader,TensorDataset
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seisbench.models as sbm
import numpy as np
import pandas as pd
import copy
import time
import random

from obspy.clients.fdsn import Client
from obspy import UTCDateTime

In [None]:
import seisbench
seisbench.use_backup_repository()

# Domestic Funtion

In [None]:
def find_nan_with_data_index(tensor):
    nan_mask = torch.isnan(tensor)
    contains_nan = nan_mask.any().item()
    nan_indices = nan_mask.nonzero(as_tuple=True) if contains_nan else tuple()
    data_indices = nan_indices[0].unique().tolist() if contains_nan else []
    return data_indices

def remove_indices(lst, indices_to_remove):
    indices_to_remove = set(indices_to_remove)
    return [item for idx, item in enumerate(lst) if idx not in indices_to_remove]

In [None]:
def residual(y_pred,y_true,series,label_position):
    #
    pred_p_index = torch.argmax(y_pred[0,label_position['P']]).numpy()
    pred_s_index = torch.argmax(y_pred[0,label_position['S']]).numpy()
    pred_prob_p = y_pred[0,label_position['P'],pred_p_index].numpy()
    pred_prob_s = y_pred[0,label_position['S'],pred_s_index].numpy()
    pred_arrival_p = op.UTCDateTime(series.Starttime) + ((1/series.sampling_rate)*pred_p_index)
    pred_arrival_s = op.UTCDateTime(series.Starttime) + ((1/series.sampling_rate)*pred_s_index)
    #
    true_p_index = torch.argmax(y_true[0,label_position['P']]).numpy()
    true_s_index = torch.argmax(y_true[0,label_position['S']]).numpy()
    true_prob_p = y_true[0,label_position['P'],true_p_index].numpy()
    true_prob_s = y_true[0,label_position['S'],true_s_index].numpy()
    true_arrival_p = op.UTCDateTime(series.Starttime) + ((1/series.sampling_rate)*true_p_index)
    true_arrival_s = op.UTCDateTime(series.Starttime) + ((1/series.sampling_rate)*true_s_index)
    #
    residual_p = abs(true_arrival_p - pred_arrival_p)
    residual_s = abs(true_arrival_s - pred_arrival_s)
    output = {
        'true_p_index': true_p_index,
        'true_s_index': true_s_index,
        'true_prob_p': true_prob_p,
        'true_prob_s': true_prob_s,
        'true_arrival_p': true_arrival_p,
        'true_arrival_s': true_arrival_s,
        'pred_p_index': pred_p_index,
        'pred_s_index': pred_s_index,
        'pred_prob_p': pred_prob_p,
        'pred_prob_s': pred_prob_s,
        'pred_arrival_p': pred_arrival_p,
        'pred_arrival_s': pred_arrival_s,
        'residual_p': residual_p,
        'residual_s': residual_s,
    }
    return output

def confusion(y_pred,y_true,label_position,true_threshold=0.4,pred_threshold=0.6):
    #
    pred_p_arr = y_pred[0,label_position['P']].numpy()
    pred_s_arr = y_pred[0,label_position['S']].numpy()
    #
    true_p_arr = y_true[0,label_position['P']].numpy()
    true_s_arr = y_true[0,label_position['S']].numpy()
    #
    pred_p_binary_arr = (pred_p_arr > pred_threshold).astype(int)
    pred_s_binary_arr = (pred_s_arr > pred_threshold).astype(int)
    #
    true_p_binary_arr = (true_p_arr > true_threshold).astype(int)
    true_s_binary_arr = (true_s_arr > true_threshold).astype(int)
    #
    TP_p = np.sum((true_p_binary_arr == 1) & (pred_p_binary_arr == 1))  
    TN_p = np.sum((true_p_binary_arr == 0) & (pred_p_binary_arr == 0))  
    FP_p = np.sum((true_p_binary_arr == 0) & (pred_p_binary_arr == 1)) 
    FN_p = np.sum((true_p_binary_arr == 1) & (pred_p_binary_arr == 0))
    #
    TP_s = np.sum((true_s_binary_arr == 1) & (pred_s_binary_arr == 1))  
    TN_s = np.sum((true_s_binary_arr == 0) & (pred_s_binary_arr == 0))  
    FP_s = np.sum((true_s_binary_arr == 0) & (pred_s_binary_arr == 1)) 
    FN_s = np.sum((true_s_binary_arr == 1) & (pred_s_binary_arr == 0))
    #
    output = {
        'TP_p': TP_p,  
        'TN_p': TN_p,  
        'FP_p': FP_p, 
        'FN_p': FN_p,
        'TP_s': TP_s,  
        'TN_s': TN_s,  
        'FP_s': FP_s, 
        'FN_s': FN_s
    }
    return output

def bulk_assessment(X,y,metadata,used_model,label_position):
    df_residual = []
    df_confusion = []
    n = len(metadata)
    for i in range(0,n):
        X_pred = X[i:i+1]
        y_true = y[i:i+1]
        with torch.no_grad():
            model.eval()
            y_pred = used_model(X_pred)
        series = metadata.iloc[i]
        residual_output = residual(y_pred=y_pred,y_true=y_true,series=series,label_position=label_position)
        confusion_output = confusion(y_pred=y_pred,y_true=y_true,label_position=label_position)
        residual_output['metadata_index'] = i
        residual_output['dataset_index'] = series.dataset_index
        df_residual.append(residual_output)
        df_confusion.append(confusion_output)
        print(f"\rData Number {i+1} Processed From {n} Total Event | {(((i+1)/(n))*100):.2f} %",end=' ')
    df_residual = pd.DataFrame(df_residual)
    df_confusion = pd.DataFrame(df_confusion)
    return df_residual,df_confusion

# Folder Path

- please fill pretrain, sampling_rate, and label_train carefully

In [None]:
pretrain = 'stead'
sampling_rate = 20
label_type = 'gaussian'

In [None]:
input_folder = r"C:\Users\bonda\Documents\Bondan\TEWS Testing\dataset\2025 Januray Picking Dataset 20 Hz"
output_path = r"C:\Users\bonda\Documents\Bondan\TEWS Testing\model_collection\model_collection\PhaseNet Seisbench Jan 2025"
foldername = join_by(['fine','tuning',pretrain,str(sampling_rate)+'Hz',label_type],separator='_')
output_folder = join_by([output_path,foldername],separator='\\')

In [None]:
os.makedirs(join_by([output_folder,'metadata'],separator='\\'), exist_ok=True)
os.makedirs(join_by([output_folder,'model'],separator='\\'), exist_ok=True)
os.makedirs(join_by([output_folder,'performance'],separator='\\'), exist_ok=True)
os.makedirs(join_by([output_folder,'picture'],separator='\\'), exist_ok=True)

# Order Setting

### 1. First Testing

In [None]:
original_order = np.load(join_by([input_folder,'metadata','original_order.npy'],separator='\\'),allow_pickle=True).item()
original_order

In [None]:
client = Client("GFZ")
t = UTCDateTime("2007/01/02 05:48:50")
stream = client.get_waveforms(network="CX", station="PB01", location="*", channel="HH?", starttime=t-100, endtime=t+100)
inv = client.get_stations(level='response',network='CX',station='PB01')
stream.remove_response(inventory=inv)
stream.detrend(type='demean')
fig = plt.figure(figsize=(15, 5))
ax = fig.add_subplot(111)
for i in range(3):
    ax.plot(stream[i].times(), stream[i].data, label=stream[i].stats.channel)
ax.legend()

In [None]:
stn = stream.copy()
waveform_duration = stn[0].stats.endtime-stn[0].stats.starttime
stn.resample(sampling_rate=(3001/waveform_duration))
print(stn)
print(stn.plot())

In [None]:
model_original = sbm.PhaseNet()
sb_pt_model = model_original.from_pretrained(pretrain)
model_original.load_state_dict(sb_pt_model.state_dict())

In [None]:
(model_original.in_samples)

In [None]:
target_component = {0:model_original.component_order[0],1:model_original.component_order[1],2:model_original.component_order[2]}
target_component

In [None]:
import torch
X_trial = torch.zeros(1,3,3001)
X_trial[0,0] = torch.FloatTensor(stn.select(component=target_component[0])[0].data)
X_trial[0,1] = torch.FloatTensor(stn.select(component=target_component[1])[0].data)
X_trial[0,2] = torch.FloatTensor(stn.select(component=target_component[2])[0].data)
min_vals = X_trial.min(dim=-1, keepdim=True).values  
max_vals = X_trial.max(dim=-1, keepdim=True).values 
X_trial = 2 * (X_trial - min_vals) / (max_vals - min_vals) - 1

In [None]:
with torch.no_grad():
    model_original.eval()
    y_pred = model_original(X_trial)
fig, axs = plt.subplots(2,figsize=(17,10))
axs[0].plot(X_trial[0,0].numpy(),label='0')
axs[0].plot(X_trial[0,1].numpy(),label='1')
axs[0].plot(X_trial[0,2].numpy(),label='2')
axs[0].set_xlabel('Data Point/Time')
axs[0].set_ylabel('Amplitude')
axs[0].set_title('Waveform Data')
axs[0].legend()
axs[1].plot(y_pred[0,0].numpy(),label='0')
axs[1].plot(y_pred[0,1].numpy(),label='1')
axs[1].plot(y_pred[0,2].numpy(),label='2')
axs[1].set_xlabel('Data Point/Time')
axs[1].set_ylabel('Probability')
axs[1].set_title('Prediction')
axs[1].legend()
fig.tight_layout()

- please determine carefully
- when we used the model to predict, sometimes what stored in model_original.labels and actual prediction is not same

In [None]:
model_original.labels

In [None]:
target_label = {0:'P',1:'S',2:'N'}

### 2. Second Testing

In [None]:
metadata_summary = pd.read_csv(join_by([input_folder,'metadata','clenaed_metadata_summary.csv'],separator='\\'))
metadata_summary.index = ['count','mean','std','min','25%','50%','75%','max']    
metadata_summary

In [None]:
syn_comp = {
    0: find_index_list(value=target_component[0],value_list=[original_order['data'][i] for i in range(0,3)]),
    1: find_index_list(value=target_component[1],value_list=[original_order['data'][i] for i in range(0,3)]),
    2: find_index_list(value=target_component[2],value_list=[original_order['data'][i] for i in range(0,3)])
}
syn_comp

In [None]:
syn_label = {
    0: find_index_list(value=target_label[0],value_list=[original_order['label'][i] for i in range(0,3)]),
    1: find_index_list(value=target_label[1],value_list=[original_order['label'][i] for i in range(0,3)]),
    2: find_index_list(value=target_label[2],value_list=[original_order['label'][i] for i in range(0,3)])
}
syn_label

In [None]:
#
used_model = model_original
index = 15139
X_trial = torch.zeros(1,3,3001)
y_trial = torch.zeros(1,3,3001)
#
path_in_loop = join_by([input_folder,'data',str(index)+'.pth'],separator='\\')
data_in_loop = torch.load(path_in_loop)
rearrange = torch.zeros(1,3,3001)
rearrange[0,0] = data_in_loop[0,syn_comp[0]]
rearrange[0,1] = data_in_loop[0,syn_comp[1]]
rearrange[0,2] = data_in_loop[0,syn_comp[2]]
X_trial[0] = rearrange[0]
#
min_vals = X_trial.min(dim=-1, keepdim=True).values  
max_vals = X_trial.max(dim=-1, keepdim=True).values 
X_trial = 2 * (X_trial - min_vals) / (max_vals - min_vals) - 1
#
path_in_loop = join_by([input_folder,'label',str(index)+'.pth'],separator='\\')
label_in_loop = torch.load(path_in_loop)
rearrange = torch.zeros(1,3,3001)
rearrange[0,0] = label_in_loop[0,syn_label[0]]
rearrange[0,1] = label_in_loop[0,syn_label[1]]
rearrange[0,2] = label_in_loop[0,syn_label[2]]
y_trial[0] = rearrange[0]
#
with torch.no_grad():
    used_model.eval()
    y_pred = used_model(X_trial)
fig, axs = plt.subplots(2,figsize=(17,10))
axs[0].plot(X_trial[0,0].numpy(),label='0')
axs[0].plot(X_trial[0,1].numpy(),label='1')
axs[0].plot(X_trial[0,2].numpy(),label='2')
axs[0].set_xlabel('Data Point/Time')
axs[0].set_ylabel('Amplitude')
axs[0].set_title('Waveform Data')
axs[0].legend()
axs[1].plot(y_pred[0,0].numpy(),label='0')
axs[1].plot(y_pred[0,1].numpy(),label='1')
axs[1].plot(y_pred[0,2].numpy(),label='2')
axs[1].set_xlabel('Data Point/Time')
axs[1].set_ylabel('Probability')
axs[1].set_title('Prediction')
axs[1].legend()
fig.tight_layout()

# Load Metadata

In [None]:
metadata = pd.read_csv(join_by([input_folder,'metadata','cleaned_metadata.csv'],separator='\\'))
metadata

In [None]:
train_index = np.int64(metadata[metadata['splitting'] == 'train']['dataset_index'].tolist())
test_index = np.int64(metadata[metadata['splitting'] == 'test']['dataset_index'].tolist())

# Load Train Data

In [None]:
real_train_index = copy.deepcopy(train_index)
real_train_index

In [None]:
X_train = torch.zeros(len(train_index),3,3001)
y_train = torch.zeros(len(train_index),3,3001)
for i in range(0,len(train_index)):
    #
    path_in_loop = join_by([input_folder,'data',str(train_index[i])+'.pth'],separator='\\')
    data_in_loop = torch.load(path_in_loop)
    rearrange = torch.zeros(1,3,3001)
    rearrange[0,0] = data_in_loop[0,syn_comp[0]]
    rearrange[0,1] = data_in_loop[0,syn_comp[1]]
    rearrange[0,2] = data_in_loop[0,syn_comp[2]]
    X_train[i] = rearrange[0]
    #
    path_in_loop = join_by([input_folder,'label',str(train_index[i])+'.pth'],separator='\\')
    label_in_loop = torch.load(path_in_loop)
    rearrange = torch.zeros(1,3,3001)
    rearrange[0,0] = label_in_loop[0,syn_label[0]]
    rearrange[0,1] = label_in_loop[0,syn_label[1]]
    rearrange[0,2] = label_in_loop[0,syn_label[2]]
    y_train[i] = rearrange[0]
    #
    print(f"\rData Number {i+1} Loaded From {len(train_index)} Total Data Available | {(((i+1)/(len(train_index)))*100):.2f} % {''*200}",end=' ')

In [None]:
data_indices = find_nan_with_data_index(X_train)
data_indices

In [None]:
X_train.shape,y_train.shape

# Load Test Data

In [None]:
real_test_index = copy.deepcopy(test_index)
real_test_index

In [None]:
X_test = torch.zeros(len(test_index),3,3001)
y_test = torch.zeros(len(test_index),3,3001)
for i in range(0,len(test_index)):
    #
    path_in_loop = join_by([input_folder,'data',str(test_index[i])+'.pth'],separator='\\')
    data_in_loop = torch.load(path_in_loop)
    rearrange = torch.zeros(1,3,3001)
    rearrange[0,0] = data_in_loop[0,syn_comp[0]]
    rearrange[0,1] = data_in_loop[0,syn_comp[1]]
    rearrange[0,2] = data_in_loop[0,syn_comp[2]]
    X_test[i] = rearrange[0]
    #
    path_in_loop = join_by([input_folder,'label',str(test_index[i])+'.pth'],separator='\\')
    label_in_loop = torch.load(path_in_loop)
    rearrange = torch.zeros(1,3,3001)
    rearrange[0,0] = label_in_loop[0,syn_label[0]]
    rearrange[0,1] = label_in_loop[0,syn_label[1]]
    rearrange[0,2] = label_in_loop[0,syn_label[2]]
    y_test[i] = rearrange[0]
    #
    print(f"\rData Number {i+1} Loaded From {len(test_index)} Total Data Available | {(((i+1)/(len(test_index)))*100):.2f} % {''*200}",end=' ')

In [None]:
data_indices = find_nan_with_data_index(X_test)
data_indices

In [None]:
X_test.shape,y_test.shape

# Data Scaling

In [None]:
min_vals = X_train.min(dim=-1, keepdim=True).values 
max_vals = X_train.max(dim=-1, keepdim=True).values
X_train = 2 * (X_train - min_vals) / (max_vals - min_vals) - 1

In [None]:
X_train.shape,y_train.shape

In [None]:
min_vals = X_test.min(dim=-1, keepdim=True).values  
max_vals = X_test.max(dim=-1, keepdim=True).values 
X_test = 2 * (X_test - min_vals) / (max_vals - min_vals) - 1

In [None]:
X_test.shape,y_test.shape

# Tensor Dataset

In [None]:
bs = 8
train_set = TensorDataset(X_train,y_train)
train_loader = DataLoader(train_set,batch_size=bs, shuffle=True)
test_set = TensorDataset(X_test,y_test)
test_loader = DataLoader(test_set,batch_size=bs, shuffle=True)

# Load Architecture

In [None]:
model_original = sbm.PhaseNet()
sb_pt_model = model_original.from_pretrained(pretrain)
model_original.load_state_dict(sb_pt_model.state_dict())

In [None]:
model = copy.deepcopy(model_original)

# Set Optimizer

In [None]:
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# learning_rate = 1e-3
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Set Loss Function

In [None]:
def criterion(y_pred,y_true,eps=1e-5):
    h = y_true * torch.log(y_pred + eps)
    h_shape = h.shape
    for i in range(0,h_shape[0]):
        for j in range(0,h_shape[1]):
            nan_list = find_nan_with_data_index(h[i,j])
            if len(nan_list) != 0:
                tensor = h[i,j]
                clean_tensor = tensor[~torch.isnan(tensor)]
                min_value = clean_tensor.min()
            for k in nan_list:
                h[i,j,k] = min_value
    h = h.mean(-1).sum(-1)  # Mean along sample dimension and sum along pick dimension
    h = h.mean()  # Mean over batch axis
    return -h

In [None]:
def calculate_alpha_beta(data):
    # Flatten all dimensions except the batch
    flattened_data = data.view(-1)  # Shape: (N * C * D)    
    # Count positive and negative samples
    num_pos = (flattened_data == 1).sum().item()
    num_neg = (flattened_data == 0).sum().item()
    total = num_pos + num_neg
    if total == 0:
        raise ValueError("No valid samples in the input data.")
    alpha = num_neg / total
    beta = num_pos / total
    return alpha,beta

def criterion(predictions, targets, alpha, beta):
    # Add a small epsilon to prevent log(0)
    epsilon = 1e-8
    loss = -(
        alpha * targets * torch.log(predictions + epsilon) +
        beta * (1 - targets) * torch.log(1 - predictions + epsilon)
    )
    return loss.mean()

alpha, beta = calculate_alpha_beta(y_train)
print(f"Alpha: {alpha}, Beta: {beta}")

In [None]:
# criterion = nn.BCELoss()
# criterion = nn.CrossEntropyLoss()
# criterion = nn.BCEWithLogitsLoss()

In [None]:
criterion(y_train[0:1],y_train[0:1])

# Checking Again Train dan Test Shape

In [None]:
X_train.shape,y_train.shape

In [None]:
X_test.shape,y_test.shape

In [None]:
index = 10
fig, axs = plt.subplots(2,figsize=(17,10))
axs[0].plot(X_train[index,0].numpy(),label='Z')
axs[0].plot(X_train[index,1].numpy(),label='N')
axs[0].plot(X_train[index,2].numpy(),label='E')
axs[0].set_xlabel('Data Point/Time')
axs[0].set_ylabel('Amplitude')
axs[0].set_title('Waveform Data')
axs[0].legend()
axs[1].plot(y_train[index,0].numpy(),label='0')
axs[1].plot(y_train[index,1].numpy(),label='1')
axs[1].plot(y_train[index,2].numpy(),label='2')
axs[1].set_xlabel('Data Point/Time')
axs[1].set_ylabel('Amplitude')
axs[1].set_title('Label Data')
axs[1].legend()

# Training

In [None]:
def str_zero(x):
    if x < 10:
        str1 = '  ' + str(int(x)) + ' %'
    elif x >= 10 and x < 100:
        str1 = ' ' + str(int(x)) + ' %'
    elif x >= 100:
        str1 = str(int(x)) + ' %'
    return str1

weight_saving_index = 0
n = len(train_loader) + len(test_loader)
model_container = []
epochs = 100
train_cost, test_cost, time_run = [0], [0], []
run_time = 0

for i in range(epochs):
    st = time.time()
    count = 0
    
    # data train
    model.train()
    cost = 0
    train_count = 0
    for feature,target in train_loader:
        
        output = model(feature)
        loss = criterion(output,target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        cost += loss.item() * feature.shape[0]
        train_count += 1
        count += 1
        
        print(f"\rEpoch: {i+1:4}/{epochs} [{str_zero((count/n)*100)}] | train_cost {train_cost[-1]:.7f} ({loss.item()}) | test_cost {test_cost[-1]:.7f} () | time {run_time:.5f} s", end=' ')
    
    train_cost.append(cost / len(train_set))
    
    # data test
    with torch.no_grad():
        model.eval()
        cost = 0
        test_count = 0
        for feature,target in test_loader:
            
            output = model(feature)
            loss = criterion(output,target)
            
            cost += loss.item() * feature.shape[0]
            test_count += 1
            count += 1
            
            print(f"\rEpoch: {i+1:4}/{epochs} [{str_zero((count/n)*100)}] | train_cost {train_cost[-1]:.7f} () | test_cost {test_cost[-1]:.7f} ({loss.item()}) | time {run_time:.5f} s", end=' ')
        
        test_cost.append(cost / len(test_set))
    
    # print report
    torch.save(model.state_dict(),join_by([output_folder,'model',"weights_epoch_" + str(weight_saving_index) + ".pth"],separator='\\'))
    weight_saving_index += 1
    et = time.time()
    run_time = et - st
    time_run.append(run_time)
    
    count += 1

# Loss Function Curve

In [None]:
plt.plot(train_cost[1:len(train_cost)],label='Data Train')
plt.plot(test_cost[1:len(test_cost)],label='Data Test')
plt.title('Loss Function Curve As Function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss Value')
plt.legend()
plt.savefig(join_by([output_folder,'picture',"loss_curve.png"],separator='\\'), dpi=2000)
plt.show()

In [None]:
loss_metadata = pd.DataFrame()
loss_metadata['loss_train'] = train_cost[1:len(train_cost)]
loss_metadata['loss_test'] = test_cost[1:len(test_cost)]
loss_metadata['run_time'] = time_run
loss_metadata

In [None]:
loss_metadata.to_csv(join_by([output_folder,'metadata',"loss_metadata.csv"],separator='\\'),index=False)

# Residual Pick and Confusion Matrix Data Frame

In [None]:
label_position = {'P': 0,'S': 1}

In [None]:
train_metadata = metadata[metadata['splitting'] == 'train'].copy()
train_metadata.index = [i for i in range(0,len(train_metadata))]
train_metadata

In [None]:
test_metadata = metadata[metadata['splitting'] == 'test'].copy()
test_metadata.index = [i for i in range(0,len(test_metadata))]
test_metadata

### 1. After (Data Train)

In [None]:
df_residual_train,df_confusion_train = bulk_assessment(X=X_train,y=y_train,metadata=train_metadata,used_model=model,label_position=label_position)

In [None]:
df_residual_train_concat = pd.concat([df_residual_train,df_confusion_train,train_metadata],axis=1)
df_residual_train_concat

In [None]:
df_residual_train_concat.to_csv(join_by([output_folder,'performance',"train_performance_after.csv"],separator='\\'),index=False)

### 2. After (Data Test)

In [None]:
df_residual_test,df_confusion_test = bulk_assessment(X=X_test,y=y_test,metadata=test_metadata,used_model=model,label_position=label_position)

In [None]:
df_residual_test_concat = pd.concat([df_residual_test,df_confusion_test,test_metadata],axis=1)
df_residual_test_concat

In [None]:
df_residual_test_concat.to_csv(join_by([output_folder,'performance',"test_performance_after.csv"],separator='\\'),index=False)

### 3. Before (Data Train)

In [None]:
df_residual_train,df_confusion_train = bulk_assessment(X=X_train,y=y_train,metadata=train_metadata,used_model=model_original,label_position=label_position)

In [None]:
df_residual_train_concat = pd.concat([df_residual_train,df_confusion_train,train_metadata],axis=1)
df_residual_train_concat

In [None]:
df_residual_train_concat.to_csv(join_by([output_folder,'performance',"train_performance_before.csv"],separator='\\'),index=False)

### 4. Before (Data Test)

In [None]:
df_residual_test,df_confusion_test = bulk_assessment(X=X_test,y=y_test,metadata=test_metadata,used_model=model_original,label_position=label_position)

In [None]:
df_residual_test_concat = pd.concat([df_residual_test,df_confusion_test,test_metadata],axis=1)
df_residual_test_concat

In [None]:
df_residual_test_concat.to_csv(join_by([output_folder,'performance',"test_performance_before.csv"],separator='\\'),index=False)

# Model Metadata

- please fill carefully

In [None]:
model_metadata = pd.DataFrame({
    'model':['PhaseNet'],
    'pretrain':[pretrain],
    'sampling_rate':[sampling_rate],
    'data_order': ['ZNE'],
    'label_order': ['PSN'],
    'window_duration':[150],
    'optimizer':['Adam'],
    'learning_rate':[learning_rate],
    'weight_decay':[False],
    'best_weight': [12],
    'loss_function': ['custom'],
    'scaling': ['-1/1 scaling']
})
model_metadata = model_metadata.T
model_metadata['param'] = model_metadata.index
model_metadata['value'] = model_metadata[0]
model_metadata.drop(columns=[0],inplace=True)
model_metadata.index = [i for i in range(0,len(model_metadata))]
model_metadata

In [None]:
model_metadata.to_csv(join_by([output_folder,'metadata',"model_metadata.csv"],separator='\\'),index=False)