# Convert Trading Strategy Results to True Label for LSTM Training

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('data/processed_price_turning_BTCUSDT.csv')
df.head()

Unnamed: 0,entry_date,entry_price,exit_date,exit_price,PnL,Balance,Cum_PnL
0,2021-01-01 07:00:00,28949.15,2021-01-01 09:45:00,29229.6,-280.45,99719.55,-280.45
1,2021-01-01 10:00:00,29306.43,2021-01-01 15:15:00,29318.62,12.19,99731.74,-268.26
2,2021-01-01 15:30:00,29149.98,2021-01-01 19:45:00,29048.47,101.51,99833.25,-166.75
3,2021-01-01 20:00:00,29183.83,2021-01-02 15:30:00,31635.92,2452.09,102285.34,2285.34
4,2021-01-02 20:30:00,32180.51,2021-01-02 22:00:00,31701.09,479.42,102764.76,2764.76


## Processing data

In [3]:
df['datetime'] = pd.to_datetime(df['entry_date'])

# Set entry_time as the index
df.set_index('datetime', inplace=True)

# Resample to hourly intervals and sum profits
hourly_profit = df['PnL'].resample('H').sum()

# Reset index to make entry_time a column again
hourly_profit = hourly_profit.reset_index()

hourly_profit

Unnamed: 0,datetime,PnL
0,2021-01-01 07:00:00,-280.45
1,2021-01-01 08:00:00,0.00
2,2021-01-01 09:00:00,0.00
3,2021-01-01 10:00:00,12.19
4,2021-01-01 11:00:00,0.00
...,...,...
30113,2024-06-09 00:00:00,0.00
30114,2024-06-09 01:00:00,0.00
30115,2024-06-09 02:00:00,0.00
30116,2024-06-09 03:00:00,0.00


## Creare a new dataset for true label

In [4]:
# Create the label column based on the profit
hourly_profit['label'] = np.where(hourly_profit['PnL'] > 0, 1, np.where(hourly_profit['PnL'] < 0, 0, np.nan))

# Select only the date and profit_flag columns
new_dataset = hourly_profit[['datetime', 'label']]

new_dataset

Unnamed: 0,datetime,label
0,2021-01-01 07:00:00,0.0
1,2021-01-01 08:00:00,
2,2021-01-01 09:00:00,
3,2021-01-01 10:00:00,1.0
4,2021-01-01 11:00:00,
...,...,...
30113,2024-06-09 00:00:00,
30114,2024-06-09 01:00:00,
30115,2024-06-09 02:00:00,
30116,2024-06-09 03:00:00,


In [5]:
# Save the DataFrame to a CSV file
new_dataset.to_csv('data/true_label.csv', index=False)

print("DataFrame created and saved to 'data/true_label.csv'.")

DataFrame created and saved to 'data/true_label.csv'.


## True label in periods

In [6]:
# Sample DataFrame with all hourly times
times = pd.date_range(start='2021-01-01 00:00:00', end='2024-06-09 00:00:00', freq='H')
df_times = pd.DataFrame(times, columns=['time'])
df_times['label'] = np.nan  # Initialize with NaN

In [7]:
# Create DataFrame for trading strategy
df = pd.DataFrame(df)
df['entry_time'] = pd.to_datetime(df['entry_date'], errors='coerce')
df['exit_time'] = pd.to_datetime(df['exit_date'], errors='coerce')

# Check for missing values and handle them
df.dropna(subset=['entry_time', 'exit_time'], inplace=True)

In [8]:
# Iterate through each trade and update labels
for i, row in df.iterrows():
    entry_hour = row['entry_time'].floor('H')
    exit_hour = row['exit_time'].floor('H')
    period_hours = pd.date_range(start=entry_hour, end=exit_hour, freq='H')

    if row['PnL'] < 0:
        df_times.loc[df_times['time'].isin(period_hours), 'label'] = 0
    elif row['PnL'] > 0:
        df_times.loc[df_times['time'].isin(period_hours), 'label'] = 1

In [9]:
df_times

Unnamed: 0,time,label
0,2021-01-01 00:00:00,
1,2021-01-01 01:00:00,
2,2021-01-01 02:00:00,
3,2021-01-01 03:00:00,
4,2021-01-01 04:00:00,
...,...,...
30116,2024-06-08 20:00:00,0.0
30117,2024-06-08 21:00:00,0.0
30118,2024-06-08 22:00:00,1.0
30119,2024-06-08 23:00:00,1.0


In [10]:
# Save to CSV
df_times.to_csv("data/period_labels.csv", index=False)
print("Data saved to data/period_labels.csv")

Data saved to data/period_labels.csv
