In [2]:
import pandas as pd
import numpy as np
import os
from datetime import datetime

# =====================
# 1. Configuration
# =====================
TEST_CSV_PATH = '../data/test_set.csv'  # Path to your input test CSV
OUTPUT_FOLDER = '../data/test_sequences/'  # Folder to save output sequences
NUM_SEQUENCES = 10
SEQUENCE_LENGTH = 10  # Fixed sequence length
RANDOM_SEED = 42

np.random.seed(RANDOM_SEED)

# =====================
# 2. Load Test Data
# =====================
print("\u2705 Loading test data...")
df = pd.read_csv(TEST_CSV_PATH, parse_dates=['DateTime'], index_col='DateTime')
print(f"Loaded {len(df)} rows from {TEST_CSV_PATH}")

# =====================
# 3. Create Fixed-Length Sequences
# =====================
if not os.path.exists(OUTPUT_FOLDER):
    os.makedirs(OUTPUT_FOLDER)

for i in range(NUM_SEQUENCES):
    start_idx = np.random.randint(0, len(df) - SEQUENCE_LENGTH)
    end_idx = start_idx + SEQUENCE_LENGTH

    seq = df.iloc[start_idx:end_idx]

    file_name = f"sequence_{i+1}_len{SEQUENCE_LENGTH}.csv"
    file_path = os.path.join(OUTPUT_FOLDER, file_name)
    seq.to_csv(file_path)
    print(f"\u2705 Saved sequence {i+1}: {file_name} ({SEQUENCE_LENGTH} rows)")

print("\n\u2705 All sequences generated and saved.")

✅ Loading test data...
Loaded 40436 rows from ../data/test_set.csv
✅ Saved sequence 1: sequence_1_len10.csv (10 rows)
✅ Saved sequence 2: sequence_2_len10.csv (10 rows)
✅ Saved sequence 3: sequence_3_len10.csv (10 rows)
✅ Saved sequence 4: sequence_4_len10.csv (10 rows)
✅ Saved sequence 5: sequence_5_len10.csv (10 rows)
✅ Saved sequence 6: sequence_6_len10.csv (10 rows)
✅ Saved sequence 7: sequence_7_len10.csv (10 rows)
✅ Saved sequence 8: sequence_8_len10.csv (10 rows)
✅ Saved sequence 9: sequence_9_len10.csv (10 rows)
✅ Saved sequence 10: sequence_10_len10.csv (10 rows)

✅ All sequences generated and saved.
