In [1]:
import numpy as np
import pandas as pd
import os
import glob
from scipy.spatial.distance import pdist, squareform
from sklearn.model_selection import train_test_split

# Data Process

In [3]:
def load_raw(csv_file):
    df = pd.read_csv(csv_file, header=None)
    coords = df.apply(lambda row: np.array([[int(y) for y in x.strip('[]').split(', ')] for x in row[1:]]), axis=1)
    coords_list = coords.tolist()
    return coords_list

def calculate_jcd(coords):
    distances = squareform(pdist(coords, 'euclidean'))
    tril_indices = np.tril_indices_from(distances, k=-1)
    jcd_features = distances[tril_indices]
    return jcd_features

def raw_process(file_path):
    raw_data = []
    coords_list = load_raw(file_path)
    for coords in coords_list:
        jcd_features = calculate_jcd(np.array(coords))
        raw_data.append(jcd_features)    
    raw_data_array = np.array(raw_data)
    raw_data_df = pd.DataFrame(raw_data_array)
    return raw_data_df

def save_jcd(raw_data_df, csv_file, save_dir):
    base_name = os.path.basename(csv_file)
    save_name = base_name.replace('keypoint', 'jcd')
    save_path = os.path.join(save_dir, save_name)
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    raw_data_df.to_csv(save_path, index=False, header=False)
    print(f"JCD features saved to {save_path}")

def process_jcd_files(data_dir, save_dir):
    csv_files = glob.glob(os.path.join(data_dir, '*.csv'))
    for csv_file in csv_files:
        raw_data_df = raw_process(csv_file)
        save_jcd(raw_data_df, csv_file, save_dir)

raw_dir = '../data/raw'
jcd_dir = '../data/jcd'
process_jcd_files(raw_dir, jcd_dir)

JCD features saved to ../data/jcd/jcd_0.csv
JCD features saved to ../data/jcd/jcd_1.csv
JCD features saved to ../data/jcd/jcd_2.csv
JCD features saved to ../data/jcd/jcd_3.csv
JCD features saved to ../data/jcd/jcd_4.csv
JCD features saved to ../data/jcd/jcd_5.csv
JCD features saved to ../data/jcd/jcd_6.csv
JCD features saved to ../data/jcd/jcd_7.csv
JCD features saved to ../data/jcd/jcd_8.csv
JCD features saved to ../data/jcd/jcd_9.csv
JCD features saved to ../data/jcd/jcd_10.csv
JCD features saved to ../data/jcd/jcd_11.csv
JCD features saved to ../data/jcd/jcd_12.csv
JCD features saved to ../data/jcd/jcd_13.csv
JCD features saved to ../data/jcd/jcd_14.csv
JCD features saved to ../data/jcd/jcd_15.csv


In [4]:
def merge_label_jcd(jcd_dir, save_path):
    all_jcd_data = []
    for jcd_file in glob.glob(os.path.join(jcd_dir, 'jcd_*.csv')):
        label = int(os.path.basename(jcd_file).split('_')[1].split('.')[0])
        df = pd.read_csv(jcd_file, header=None)
        df['label'] = label
        all_jcd_data.append(df)

    merged_df = pd.concat(all_jcd_data, ignore_index=True)
    merged_df = merged_df.sample(frac=1).reset_index(drop=True)
    merged_df.to_csv(save_path, index=False, header=False)
    print(f"Merged JCD data with labels saved to: {save_path}")

jcd_dir = '../data/jcd'
merge_path = '../data/merge_jcd.csv'
merge_label_jcd(jcd_dir, merge_path)

Merged JCD data with labels saved to: ../data/merge_jcd.csv


# Train

In [5]:
from tensorflow.keras.layers import Input, Dense, BatchNormalization, LeakyReLU, Dropout
from tensorflow.keras.models import Model


def build_dd_net(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    x = Dense(256, use_bias=False)(inputs)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.01)(x)
    x = Dropout(0.5)(x)

    x = Dense(128, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.01)(x)
    x = Dropout(0.5)(x)

    x = Dense(64, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.01)(x)
    x = Dropout(0.5)(x)

    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)

    return model


# # Example usage:
# input_shape = (231,)  # for example, 231 JCD features
# num_classes = 5  # assuming 5 classes for classification
#
# model = build_simplified_dd_net(input_shape, num_classes)
# model.summary()

2024-04-01 14:06:26.662548: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-01 14:06:26.706937: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-01 14:06:26.707502: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

def load_data(csv_file):
    df = pd.read_csv(csv_file, header=None)
    X = df.iloc[:, :-1].apply(pd.to_numeric).values
    y = df.iloc[:, -1].apply(pd.to_numeric).values
    return X, y

RANDOM_SEED = 42
dataset = '../data/merge_jcd.csv'
model_save_path = '../models/ddnet_model.h5'

X, y = load_data('../data/merge_jcd.csv')
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.75, random_state=RANDOM_SEED)

input_shape = X_train.shape[1:] # (210,)
num_classes = len(set(y_train)) # 5
cp_callback = ModelCheckpoint(
    model_save_path, verbose=1, save_weights_only=False)
es_callback = EarlyStopping(patience=20, verbose=1)

model = build_dd_net(input_shape, num_classes)
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    X_train, 
    y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=128,
    callbacks=[cp_callback, es_callback]
)

Epoch 1/1000


2024-04-01 14:06:58.059353: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-01 14:06:58.083135: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1: saving model to ../models/ddnet_model.h5
Epoch 2/1000
Epoch 2: saving model to ../models/ddnet_model.h5
Epoch 3/1000
Epoch 3: saving model to ../models/ddnet_model.h5
Epoch 4/1000
Epoch 4: saving model to ../models/ddnet_model.h5
Epoch 5/1000


  saving_api.save_model(


Epoch 5: saving model to ../models/ddnet_model.h5
Epoch 6/1000
Epoch 6: saving model to ../models/ddnet_model.h5
Epoch 7/1000
Epoch 7: saving model to ../models/ddnet_model.h5
Epoch 8/1000
Epoch 8: saving model to ../models/ddnet_model.h5
Epoch 9/1000
Epoch 9: saving model to ../models/ddnet_model.h5
Epoch 10/1000
Epoch 10: saving model to ../models/ddnet_model.h5
Epoch 11/1000
Epoch 11: saving model to ../models/ddnet_model.h5
Epoch 12/1000
Epoch 12: saving model to ../models/ddnet_model.h5
Epoch 13/1000
Epoch 13: saving model to ../models/ddnet_model.h5
Epoch 14/1000
Epoch 14: saving model to ../models/ddnet_model.h5
Epoch 15/1000
Epoch 15: saving model to ../models/ddnet_model.h5
Epoch 16/1000
Epoch 16: saving model to ../models/ddnet_model.h5
Epoch 17/1000
Epoch 17: saving model to ../models/ddnet_model.h5
Epoch 18/1000
Epoch 18: saving model to ../models/ddnet_model.h5
Epoch 19/1000
Epoch 19: saving model to ../models/ddnet_model.h5
Epoch 20/1000
Epoch 20: saving model to ../model