In [1]:
import pandas as pd
import numpy as np
import pickle
import sklearn
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("mon_standard_dataset.csv")

In [3]:
data = data[['Direction_Size_Sequence', 'Label']]

In [4]:
data['Direction_Only_Sequence'] = data['Direction_Size_Sequence'].apply(lambda x: [1 if i > 0 else -1 for i in eval(x)])

In [5]:
# 라벨이 0~94 사이인 데이터만 필터링
data = data[data['Label'].between(0, 94)]

In [6]:
def adjust_sequence_length(sequence, target_length=10000, padding_value=-1):
    if isinstance(sequence, (int, float, np.float32, np.float64)):
        sequence = [sequence]
    if len(sequence) < target_length:
        sequence = sequence + [padding_value] * (target_length - len(sequence))
    else:
        sequence = sequence[:target_length]
    return sequence


# Convert DataFrame column to NumPy array
sequence_array = data['Direction_Only_Sequence'].to_numpy()

# Adjust sequence lengths
sequence_array = np.array([adjust_sequence_length(seq) for seq in sequence_array])

# Convert data to float32 type
sequence_array = sequence_array.astype('float32')

# Add an extra dimension to create [Length x 1] x n shape
sequence_array = sequence_array[:, :, np.newaxis]

# Now sequence_array is a 3D NumPy array
# We don't need to put it back into the DataFrame

# Verify the shape
print("Shape of sequence_array:", sequence_array.shape)

# If you need to keep other columns from the original DataFrame, you can do:
# other_columns = data.drop('Direction_Only_Sequence', axis=1)

# And if you need to use the sequence_array later with other DataFrame operations:
# data = pd.DataFrame({'Direction_Only_Sequence': list(sequence_array)})
# data = pd.concat([data, other_columns], axis=1)


Shape of sequence_array: (19000, 10000, 1)


In [7]:
mon_data = sequence_array

In [8]:
# Save the processed data
with open("mon_data.pkl", "wb") as f:
    pickle.dump(mon_data, f)

print("Data processed and saved successfully.")

Data processed and saved successfully.


In [9]:
print(mon_data.shape)


(19000, 10000, 1)
