In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import glob

# Load your dataset
# Assume the dataset is a CSV file with columns: 'x', 'y', 'z', 'amplitude', 'phase'
# data = pd.read_csv('particle_data.csv')


# Load your dataset
# data = pd.read_csv(r"C:\Users\weicheng\Desktop\formal_dataset\p4_stacking_r1\4a-240702-gspat-stacking-0.01v-1_001_pre_process_df2.csv")
# data_label = pd.read_csv(r"C:\Users\weicheng\Desktop\formal_dataset\p4_stacking_r1\4a-240702-gspat-stacking-0.01v-1_001_pre_process_df_labels.csv")

# # Concatenate both datasets if they have the same structure
# data_combined = pd.concat([data, data_label], axis=1)

# # Drop rows with any NaN values from combined dataset
# data_combined.dropna(axis=0, how='any', inplace=True)


# Preprocess the data
features = ['time', 'x0', 'y0', 'z0', 'x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x3', 'y3', 'z3', 'amp0', 'phase0', 'amp1', 'phase1', 'amp2', 'phase2', 'amp3', 'phase3']
# target = ['x1', 'y1', 'z1']

# Load all CSV files
file_pattern = 'C:/Users/weicheng/Desktop/formal_dataset/p4_x_r1_240813/4a-240702-gspat-crossing-0.50v-1*_pre_process_df2.csv'  # Adjust the pattern to match your files
files = glob.glob(file_pattern)
# files = []

# Initialize the scaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Function to preprocess and create sequences
def preprocess_and_create_sequences(file, time_steps=10):
    print("using:", file)
    data = pd.read_csv(file)

    # # Interpolate NaNs using spline method
    # data = data.interpolate(method='spline', order=3)
    
    # # Fill any remaining NaNs if interpolation did not cover some edge cases
    # data = data.fillna(method='bfill').fillna(method='ffill')

    data.dropna(axis=0, how='any', inplace=True)
    # print(data)

    scaled_data = scaler.fit_transform(data[features])
    X, y = create_dataset(scaled_data, time_steps)
    return X, y

# Convert the data to a supervised learning problem
def create_dataset(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps), :])
        y.append(data[i + time_steps, 0:13])  # Predicting x, y, z
    return np.array(X), np.array(y)

# Define the number of time steps
time_steps = 10

# Create a list to store training and testing datasets
X_train_list, y_train_list = [], []
X_test_list, y_test_list = [], []

# Split files into training and testing sets (80% training, 20% testing)
train_files = files[:int(len(files) * 0.8)]
test_files = files[int(len(files) * 0.8):]

# Process each training file
for file in train_files:
    X, y = preprocess_and_create_sequences(file, time_steps)
    X_train_list.append(X)
    y_train_list.append(y)

# Process each testing file
for file in test_files:
    X, y = preprocess_and_create_sequences(file, time_steps)
    X_test_list.append(X)
    y_test_list.append(y)

# Concatenate all training and testing data
X_train = np.concatenate(X_train_list, axis=0)
y_train = np.concatenate(y_train_list, axis=0)
X_test = np.concatenate(X_test_list, axis=0)
y_test = np.concatenate(y_test_list, axis=0)

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_steps, len(features))))
model.add(LSTM(50))
model.add(Dense(13))  # Output layer predicting x, y, z

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Make predictions
predictions = model.predict(X_test)

# Inverse transform the predictions to the original scale
scaled_predictions = np.concatenate((predictions, np.zeros((predictions.shape[0], len(features) - 13))), axis=1)
predictions = scaler.inverse_transform(scaled_predictions)[:, 0:13]

# Print the first 5 predictions
print(predictions)
print(len(predictions))

# Round the first column to 2 decimal places
predictions[:, 0] = np.round(predictions[:, 0], 2)

# Save predictions to CSV
output_df = pd.DataFrame(predictions, columns=['time', 'x0', 'y0', 'z0', 'x1', 'y1', 'z1', 'x2', 'y2' ,'z2', 'x3', 'y3' ,'z3'])
output_df.to_csv('predictions.csv', index=False)


using: C:/Users/weicheng/Desktop/formal_dataset/p4_x_r1_240813\4a-240702-gspat-crossing-0.50v-1_001_c_pre_process_df2.csv
using: C:/Users/weicheng/Desktop/formal_dataset/p4_x_r1_240813\4a-240702-gspat-crossing-0.50v-1_002_c_pre_process_df2.csv
using: C:/Users/weicheng/Desktop/formal_dataset/p4_x_r1_240813\4a-240702-gspat-crossing-0.50v-1_003_c_pre_process_df2.csv
using: C:/Users/weicheng/Desktop/formal_dataset/p4_x_r1_240813\4a-240702-gspat-crossing-0.50v-1_004_c_pre_process_df2.csv
using: C:/Users/weicheng/Desktop/formal_dataset/p4_x_r1_240813\4a-240702-gspat-crossing-0.50v-1_005_c_pre_process_df2.csv
using: C:/Users/weicheng/Desktop/formal_dataset/p4_x_r1_240813\4a-240702-gspat-crossing-0.50v-1_006_c_pre_process_df2.csv
using: C:/Users/weicheng/Desktop/formal_dataset/p4_x_r1_240813\4a-240702-gspat-crossing-0.50v-1_007_c_pre_process_df2.csv
using: C:/Users/weicheng/Desktop/formal_dataset/p4_x_r1_240813\4a-240702-gspat-crossing-0.50v-1_008_c_pre_process_df2.csv
using: C:/Users/weichen