# Convert Trading Strategy Results to True Label for LSTM Training

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('data/processed_price_turning_BTCUSDT.csv')
df.head()

Unnamed: 0,entry_date,entry_price,exit_date,exit_price,PnL,Balance,Cum_PnL
0,2021-01-01 07:00:00,28949.15,2021-01-01 09:45:00,29229.6,-280.45,99719.55,-280.45
1,2021-01-01 10:00:00,29306.43,2021-01-01 15:15:00,29318.62,12.19,99731.74,-268.26
2,2021-01-01 15:30:00,29149.98,2021-01-01 19:45:00,29048.47,101.51,99833.25,-166.75
3,2021-01-01 20:00:00,29183.83,2021-01-02 15:30:00,31635.92,2452.09,102285.34,2285.34
4,2021-01-02 20:30:00,32180.51,2021-01-02 22:00:00,31701.09,479.42,102764.76,2764.76


## Processing data

In [3]:
df['datetime'] = pd.to_datetime(df['entry_date'])

# Set entry_time as the index
df.set_index('datetime', inplace=True)

# Reset index to make entry_time a column again
df = df.reset_index()

df

Unnamed: 0,datetime,entry_date,entry_price,exit_date,exit_price,PnL,Balance,Cum_PnL
0,2021-01-01 07:00:00,2021-01-01 07:00:00,28949.15,2021-01-01 09:45:00,29229.60,-280.45,99719.55,-280.45
1,2021-01-01 10:00:00,2021-01-01 10:00:00,29306.43,2021-01-01 15:15:00,29318.62,12.19,99731.74,-268.26
2,2021-01-01 15:30:00,2021-01-01 15:30:00,29149.98,2021-01-01 19:45:00,29048.47,101.51,99833.25,-166.75
3,2021-01-01 20:00:00,2021-01-01 20:00:00,29183.83,2021-01-02 15:30:00,31635.92,2452.09,102285.34,2285.34
4,2021-01-02 20:30:00,2021-01-02 20:30:00,32180.51,2021-01-02 22:00:00,31701.09,479.42,102764.76,2764.76
...,...,...,...,...,...,...,...,...
4825,2024-06-08 07:15:00,2024-06-08 07:15:00,69525.60,2024-06-08 12:15:00,69391.60,-134.00,711884.14,611884.14
4826,2024-06-08 12:30:00,2024-06-08 12:30:00,69342.30,2024-06-08 14:30:00,69430.90,-88.60,711795.54,611795.54
4827,2024-06-08 14:45:00,2024-06-08 14:45:00,69499.90,2024-06-08 22:00:00,69425.50,-74.40,711721.14,611721.14
4828,2024-06-08 22:15:00,2024-06-08 22:15:00,69331.00,2024-06-09 04:00:00,69299.70,31.30,711752.44,611752.44


## Creare a new dataset for true label

In [4]:
# Create the label column based on the profit
df['label'] = np.where(df['PnL'] > 0, 1, np.where(df['PnL'] < 0, 0, np.nan))

# Select only the date and profit_flag columns
new_dataset = df[['datetime', 'label']]

new_dataset

Unnamed: 0,datetime,label
0,2021-01-01 07:00:00,0.0
1,2021-01-01 10:00:00,1.0
2,2021-01-01 15:30:00,1.0
3,2021-01-01 20:00:00,1.0
4,2021-01-02 20:30:00,1.0
...,...,...
4825,2024-06-08 07:15:00,0.0
4826,2024-06-08 12:30:00,0.0
4827,2024-06-08 14:45:00,0.0
4828,2024-06-08 22:15:00,1.0


In [5]:
# Save the DataFrame to a CSV file
new_dataset.to_csv('data/true_label.csv', index=False)

print("DataFrame created and saved to 'data/true_label.csv'.")

DataFrame created and saved to 'data/true_label.csv'.


## True label in periods

In [6]:
# Sample DataFrame with all hourly times
times = pd.date_range(start='2021-01-01 00:00:00', end='2024-06-09 00:00:00', freq='15T')
df_times = pd.DataFrame(times, columns=['time'])
df_times['label'] = np.nan  # Initialize with NaN

In [7]:
# Create DataFrame for trading strategy
df = pd.DataFrame(df)
df['entry_time'] = pd.to_datetime(df['entry_date'], errors='coerce')
df['exit_time'] = pd.to_datetime(df['exit_date'], errors='coerce')

# Check for missing values and handle them
df.dropna(subset=['entry_time', 'exit_time'], inplace=True)

In [8]:
# Iterate through each trade and update labels
for i, row in df.iterrows():
    entry_time = row['entry_time']
    exit_time = row['exit_time']
    period_time = pd.date_range(start=entry_time, end=exit_time, freq='15T')

    if row['PnL'] < 0:
        df_times.loc[df_times['time'].isin(period_time), 'label'] = 0
    elif row['PnL'] > 0:
        df_times.loc[df_times['time'].isin(period_time), 'label'] = 1

In [9]:
df_times

Unnamed: 0,time,label
0,2021-01-01 00:00:00,
1,2021-01-01 00:15:00,
2,2021-01-01 00:30:00,
3,2021-01-01 00:45:00,
4,2021-01-01 01:00:00,
...,...,...
120476,2024-06-08 23:00:00,1.0
120477,2024-06-08 23:15:00,1.0
120478,2024-06-08 23:30:00,1.0
120479,2024-06-08 23:45:00,1.0


In [10]:
# Save to CSV
df_times.to_csv("data/period_labels.csv", index=False)
print("Data saved to data/period_labels.csv")

Data saved to data/period_labels.csv
