# Import packages

In [220]:
from loguru import logger
import pandas as pd
import talib


# Import Data

In [221]:
# Load each CSV file into a DataFrame
df_15m = pd.read_csv('spot_klines_data/BTCUSDT_15m_2024-2025.csv')
df_1d = pd.read_csv('spot_klines_data/BTCUSDT_1d_2024-2025.csv')
df_1m = pd.read_csv('spot_klines_data/BTCUSDT_1m_2024-2025.csv')
df_1w = pd.read_csv('spot_klines_data/BTCUSDT_1w_2024-2025.csv')
df_5m = pd.read_csv('spot_klines_data/BTCUSDT_5m_2024-2025.csv')

# Strategy

In [237]:
class Strategy:
    def __init__(self, data, timeframe_type):
        self.data = data  # DataFrame containing historical price data
        self.timeframe_type = timeframe_type  # 'long' or 'short'

    def calculate_indicators(self):
        # Based on timeframe_type, select appropriate indicators
        if self.timeframe_type == 'trend':  # Long-term timeframes like 1D, 1W
            self.data['EMA50'] = ta.ema(self.data['close'], length=50)
            self.data['EMA200'] = ta.ema(self.data['close'], length=200)

            self.data['RSI'] = ta.rsi(self.data['close'], length=14)
            self.data['Volume'] = self.data['volume']  # Volume might not need to be calculated
    
        elif self.timeframe_type == 'intraday':  # Short-term timeframes like 1m, 15m, 1H
            self.data['RSI'] = ta.rsi(self.data['close'], length=7)
            
            macd_values = ta.macd(self.data['close'])
            self.data['MACD'] = macd_values['MACD_12_26_9']
            self.data['MACD_signal'] = macd_values['MACDs_12_26_9']
            self.data['MACD_prev'] = self.data['MACD'].shift(1)
            self.data['MACD_signal_prev'] = self.data['MACD_signal'].shift(1)


        elif self.timeframe_type == 'confirmation':  
            self.data['RSI'] = ta.rsi(self.data['close'], length=7)
            macd_values = ta.macd(self.data['close'])
            self.data['MACD'] = macd_values['MACD_12_26_9']
            self.data['MACD_signal'] = macd_values['MACDs_12_26_9']
            self.data['EMA9'] = ta.ema(self.data['close'], length=9)
            self.data['EMA21'] = ta.ema(self.data['close'], length=21)
        else:
            return None
        return self.data


    def preprocess_data(self):
        # Drop NaN values after adding indicators
        self.data.dropna(inplace=True)
        return self.data

    def get_decision(self):
        self.calculate_indicators()
        self.preprocess_data()
    
        if self.data.empty:
            raise ValueError("Error: DataFrame is empty before setting signals.")
    
        self.data['Signal'] = 0  # Default value
    
        if self.timeframe_type == 'trend':
            self.data.loc[(self.data['EMA50'] > self.data['EMA200']) & 
                          (self.data['RSI'] > 50) & (self.data['RSI'] < 70), 'Signal'] = 1
    
        elif self.timeframe_type == 'intraday':
            self.data.loc[(self.data['RSI'] < 30) & (self.data['MACD'] > self.data['MACD_signal']), 'Signal'] = 1
    
        elif self.timeframe_type == 'confirmation':
            self.data.loc[(self.data['RSI'] < 30) & (self.data['MACD'] > self.data['MACD_signal']), 'Signal'] = 1
    
        return self.data


# Fetch and process data

In [238]:
def fetch_and_process_data(data, timeframe, close_time, augmentation=0):
    """
    Fetch and process data for a given timeframe and close_time.

    Parameters:
        data (str or pd.DataFrame): Path to CSV file or a DataFrame containing historical data.
        timeframe (str): Timeframe (e.g., "1m", "5m", "1h", "1d", "1w").
        close_time (str or datetime): The end time for data selection.
        augmentation (int): Number of additional data points to include after 200.

    Returns:
        pd.DataFrame: Filtered dataset containing the last 200 + augmentation points.
    """
    # Load data if it's a file path
    if isinstance(data, str):
        df = pd.read_csv(data)
    elif isinstance(data, pd.DataFrame):
        df = data.copy()
    else:
        raise ValueError("`data` must be a file path (str) or a pandas DataFrame.")

    # Ensure 'close_time' column exists
    if 'close_time' not in df.columns:
        raise KeyError("Missing 'close_time' column in the dataset.")

    # Convert 'close_time' to datetime
    df['close_time'] = pd.to_datetime(df['close_time'])
    close_time = pd.to_datetime(close_time)

    # Define timeframe intervals
    timeframe_intervals = {
        "1m": "T",    # Minute
        "5m": "5T",   # 5 Minutes
        "15m": "15T", # 15 Minutes
        "30m": "30T", # 30 Minutes
        "1h": "H",    # Hour
        "4h": "4H",   # 4 Hours
        "1d": "D",    # Day
        "1w": "W"     # Week
    }

    if timeframe not in timeframe_intervals:
        raise ValueError(f"Unsupported timeframe: {timeframe}")

    # Step 1: Fetch 200 historical data points (before or at `close_time`)
    df_past = df[df['close_time'] <= close_time].sort_values(by='close_time', ascending=False).head(200)
    
    # Step 2: Fetch additional `augmentation` points after `close_time`
    df_future = df[df['close_time'] > close_time].sort_values(by='close_time', ascending=True).head(augmentation)

    # Combine past and future data
    df_final = pd.concat([df_past, df_future]).sort_values(by='close_time', ascending=True).reset_index(drop=True)

    return df_final


# Apply Strategy with a Scheduling function

In [248]:
startegy = Strategy(df_5m, timeframe_type="trend")
df = startegy.get_decision()
positives = df[df["Signal"] == 1]

collected_ = fetch_and_process_data(data, timeframe, close_time, augmentation=1)
len(collected_)

32851