# import libs

In [None]:
import os
import socket
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm
from FCNN import TwoInputFCNN as FCNN


# import data

In [None]:
# paths pre-setting
if 'gput' in socket.gethostname() or 'hpc' in socket.gethostname():
    label_root = '/home/sxy841/ERIE/silicone/output'
    model_root = '/home/sxy841/ERIE/silicone/Track-Shuyuan-2023-06-13/videos'
else:
    label_root = '/home/ysy/ERIE/single_psm_manipulation_dataset/output'
    model_root = '/home/ysy/ERIE/videos'

model_name = 'DLC_resnet50_TrackJun13shuffle1_50000'



In [None]:
# pre setting training sets
training_sets = ['C_M1_T1_1', 'C_M1_T1_3', 'C_M1_T1_5', 'C_M1_T1_7', 
                 'R2_M1_T1_1', 'R2_M1_T1_3', 'R2_M1_T1_5', 'R2_M1_T1_7', 
                 'L2_M1_T1_1', 'L2_M1_T1_3', 'L2_M1_T1_5', 'L2_M1_T1_7', 
                 'Z2_M1_T1_1', 'Z2_M1_T1_3', 'Z2_M1_T1_5']

# initial arrays
X_train_L = np.zeros((0, 16))
X_train_R = np.zeros((0, 16))
y_train = np.zeros((0, 3))

for set in tqdm(training_sets):
    # load from files
    labels = np.genfromtxt(os.path.join(
        label_root, set, 'labels_30hz.txt'), delimiter=',')
    coordinates_L = pd.read_hdf(os.path.join(
        model_root, f'{set}_L_h264{model_name}.h5'))
    coordinates_R = pd.read_hdf(os.path.join(
        model_root, f'{set}_R_h264{model_name}.h5'))

    # unify size
    frames = min(len(labels), len(coordinates_L), len(coordinates_R))

    # drop and convert
    X_train_L = np.vstack((X_train_L,
                          coordinates_L.filter(
                              regex='^(?!.*likelihood).*$', axis=1).to_numpy()[:frames]))
    X_train_R = np.vstack((X_train_R,
                          coordinates_L.filter(
                              regex='^(?!.*likelihood).*$', axis=1).to_numpy()[:frames]))
    y_train = np.vstack((y_train, labels[:frames, 6:9]))


from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler
X_scaler = MinMaxScaler()
X_train_L = X_scaler.fit_transform(X_train_L)
X_train_R = X_scaler.transform(X_train_R)

y_scaler = MinMaxScaler()
y_train = y_scaler.fit_transform(y_train)



In [None]:
X_train_L.shape, X_train_R.shape, y_train.shape

In [None]:
# pre setting training sets
test_sets = ['C_M1_T1_2', 'C_M1_T1_4',
             'R2_M1_T1_2', 'R2_M1_T1_4',
             'L2_M1_T1_2', 'L2_M1_T1_4',
             'Z2_M1_T1_2',]

# initial arrays
X_val_L = np.zeros((0, 16))
X_val_R = np.zeros((0, 16))
y_val = np.zeros((0, 3))

for set in tqdm(test_sets):
    # load from files
    labels = np.genfromtxt(os.path.join(
        label_root, set, 'labels_30hz.txt'), delimiter=',')
    coordinates_L = pd.read_hdf(os.path.join(
        model_root, f'{set}_L_h264{model_name}.h5'))
    coordinates_R = pd.read_hdf(os.path.join(
        model_root, f'{set}_R_h264{model_name}.h5'))

    # unify size
    frames = min(len(labels), len(coordinates_L), len(coordinates_R))

    # drop and convert
    X_val_L = np.vstack((X_val_L,
                          coordinates_L.filter(
                              regex='^(?!.*likelihood).*$', axis=1).to_numpy()[:frames]))
    X_val_R = np.vstack((X_val_R,
                          coordinates_L.filter(
                              regex='^(?!.*likelihood).*$', axis=1).to_numpy()[:frames]))
    y_val = np.vstack((y_val, labels[:frames, 6:9]))

X_val_L = X_scaler.transform(X_val_L)
X_val_R = X_scaler.transform(X_val_R)
y_val = y_scaler.transform(y_val)


In [None]:
X_val_L.shape, X_val_R.shape, y_val.shape

# Trainning and validation

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for i in range(2,6):
    net = FCNN(input_dim=X_train_L.shape[1], hidden_dim=64, output_dim=y_train.shape[1], device=device, num_hidden_layers=i)
    net.train(X_train_R, X_train_L, y_train, X_val_R, X_val_L, y_val, epochs=100, use_tqdm=True, save_loss=True)


# Demos

In [None]:
import json
import matplotlib.pyplot as plt
import glob
import numpy as np

# Get a list of files that match the pattern
files = glob.glob('params_and_losses*.json')

# Determine the number of subplots from the loss dimension
with open(files[0], 'r') as f:
    data = json.load(f)
losses = data['losses'][0]  # Take first loss array as an example
num_subplots = len(losses)  # Include plot for the mean loss

# Create a subplot for each loss dimension
fig, axs = plt.subplots(num_subplots, figsize=(12, 6 * num_subplots))

# Ensure axs is a list even when num_subplots = 1
if num_subplots == 1:
    axs = [axs]

# Loop over each file
for filename in files:
    # Load the data from the JSON file
    with open(filename, 'r') as f:
        data = json.load(f)

    # Extract the training parameters from the data
    params = {k: v for k, v in data.items() if k != 'losses'}

    # Create a label for this line from the training parameters
    label = ', '.join(f'{key}: {value}' for key, value in params.items())

    # Extract loss values
    losses = data['losses']

    # Plot the loss values for each dimension
    for i in range(num_subplots-1):
        axs[i].plot([loss[i] for loss in losses], label=label)
        axs[i].set_title(f'Loss for dimension {i+1} over iterations')
        axs[i].set_xlabel('Iteration')
        axs[i].set_ylabel('Loss')
        axs[i].grid(True)
        axs[i].legend()

    # Plot for the mean loss
    axs[-1].plot([loss[-1] for loss in losses], label=label)
    axs[-1].set_title('Mean Loss over iterations')
    axs[-1].set_xlabel('Iteration')
    axs[-1].set_ylabel('Mean Loss')
    axs[-1].grid(True)
    axs[-1].legend()

plt.tight_layout()
plt.show()


In [None]:
for i in range(3):
    q1 = np.percentile(y_train[:,i], 25)
    q3 = np.percentile(y_train[:,i], 75)

    print(q1, q3)

In [None]:
# 绘制直方图
plt.hist(y_train, bins=30, edgecolor='black', label=['x', 'y', 'z'])

# 添加标题和标签
plt.title("Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")

# 添加图例
plt.legend()

# 显示图形
plt.show()

In [None]:
# 绘制箱线图
plt.boxplot(y_train, showfliers=False)

# 添加标题和标签
plt.title("Box Plot")
plt.xlabel("Data")

# 显示图形
plt.show()
