In [2]:
import pandas as pd

# Load the CSV data
df = pd.read_csv('btc_1h_data_training.csv')

# Initialize dataframes for trends
uptrend_df = pd.DataFrame()
downtrend_df = pd.DataFrame()
sideways_df = pd.DataFrame()

In [3]:
df['MA'] = df['close'].rolling(window=6).mean()

# Loop through the dataset in chunks of 6 rows
for i in range(5, len(df)):  # Start from the 6th row because of the moving average window
    # Get the moving average values for the chunk
    ma_chunk = df.iloc[i-5:i+1]['MA']

    # Check if the chunk has valid moving averages
    if ma_chunk.isna().any():
        continue

    # Determine the trend based on the moving average
    if ma_chunk.is_monotonic_increasing:  # If moving average is increasing
        uptrend_df = pd.concat([uptrend_df, df.iloc[i-5:i+1]])
    elif ma_chunk.is_monotonic_decreasing:  # If moving average is decreasing
        downtrend_df = pd.concat([downtrend_df, df.iloc[i-5:i+1]])
    else:  # If neither increasing nor decreasing, it's sideways
        sideways_df = pd.concat([sideways_df, df.iloc[i-5:i+1]])

In [4]:
uptrend_df.to_csv('uptrend.csv', index=False)
downtrend_df.to_csv('downtrend.csv', index=False)
sideways_df.to_csv('sideways.csv', index=False)

In [7]:
print("uptrend_df length: ", len(uptrend_df))
print("downtrend_df length: ", len(downtrend_df))
print("sideways_df length: ", len(sideways_df))
total = len(uptrend_df) + len(downtrend_df) + len(sideways_df)
print("uptrend_df / total length: ", len(uptrend_df)/total)
print("downtrend_df / total length: ", len(downtrend_df)/total)
print("sideways_df / total length: ", len(sideways_df)/total)

uptrend_df length:  71622
downtrend_df length:  64434
sideways_df length:  191622
uptrend_df / total length:  0.21857433211872632
downtrend_df / total length:  0.19663816307472579
sideways_df / total length:  0.5847875048065478


In [9]:
min_length = min(len(uptrend_df), len(downtrend_df), len(sideways_df))

# Sample the same number of rows from each DataFrame
uptrend_sampled = uptrend_df.sample(n=min_length, random_state=42)
downtrend_sampled = downtrend_df.sample(n=min_length, random_state=42)
sideways_sampled = sideways_df.sample(n=min_length, random_state=42)

# Concatenate the sampled DataFrames
balanced_df = pd.concat([uptrend_sampled, downtrend_sampled, sideways_sampled])

# Sort the resulting DataFrame by timestamp (assuming there's a 'time' column)
balanced_df = balanced_df.sort_values(by='time')

# Reset the index if necessary
balanced_df = balanced_df.reset_index(drop=True)

# Save the balanced DataFrame to a CSV file
balanced_df.to_csv('balanced_trend_data.csv', index=False)

print("Balanced DataFrame created with equal distribution of trends.")

Balanced DataFrame created with equal distribution of trends.
