# **<div align="center">DATASETS CREATION AND SPLIT </div>**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [7]:
def export_dataset(df: pd.DataFrame, path: str, include_index: bool = False) -> None:

    '''
    Export the DataFrame to a CSV file.

    Parameters:
    df (pd.DataFrame): The DataFrame to export.
    path (str): The file path where to save the CSV.
    include_index (bool): Whether to include the index in the CSV file (default False).

    Returns:
    None: This function prints status messages but does not return a value.
    '''
    
    if not isinstance(path, str):
        raise ValueError("The path must be a string.")

    try:
        df.to_csv(path, index=include_index)
        print(f"Data exported successfully to {path}")
    except Exception as e:
        print(f"Error exporting data: {e}")

## **Load data**

In [2]:
binance_features = pd.read_csv("../data/processed/binance_features.csv", index_col=0, parse_dates=['date'])
binance_features = binance_features.sort_index()

## **Datasets creations**

### **Dataset 1: Price predictions (ARIMA, Prophet, LSTM, etc.)**

In [4]:
binance_price = binance_features.copy()
binance_price['target_price'] = binance_price['price_usd'].shift(-1)
binance_price = binance_price.dropna(subset=['target_price'])

split_idx = int(len(binance_price) * 0.8)
train_price = binance_price.iloc[:split_idx]
test_price = binance_price.iloc[split_idx:]

print("Dataset PRICE prepared:")
print(f"Train: {train_price.shape}, Test: {test_price.shape}")

Dataset PRICE prepared:
Train: (66, 46), Test: (17, 46)


### **Dataset 2: Returns prediction (ML and DL)**

In [5]:
binance_returns = binance_features.copy()
binance_returns['target_return'] = binance_returns['returns'].shift(-1)
binance_returns = binance_returns.dropna(subset=['target_return'])

# También creamos versión de clasificación (sube/baja)
binance_returns['target_class'] = (binance_returns['target_return'] > 0).astype(int)

# División temporal 80/20
split_idx = int(len(binance_returns) * 0.8)
train_returns = binance_returns.iloc[:split_idx]
test_returns = binance_returns.iloc[split_idx:]

print("Dataset RETURNS prepared:")
print(f"Train: {train_returns.shape}, Test: {test_returns.shape}")

Dataset RETURNS prepared:
Train: (66, 47), Test: (17, 47)


## **Export datasets**

In [9]:
export_dataset(binance_price, "../data/processed/binance_target_price.csv")
export_dataset(binance_returns, "../data/processed/binance_target_returns.csv")
export_dataset(train_price, "../data/processed/binance_train_price.csv")
export_dataset(test_price, "../data/processed/binance_test_price.csv")
export_dataset(train_returns, "../data/processed/binance_train_returns.csv")
export_dataset(test_returns, "../data/processed/binance_test_returns.csv")

Data exported successfully to ../data/processed/binance_target_price.csv
Data exported successfully to ../data/processed/binance_target_returns.csv
Data exported successfully to ../data/processed/binance_train_price.csv
Data exported successfully to ../data/processed/binance_test_price.csv
Data exported successfully to ../data/processed/binance_train_returns.csv
Data exported successfully to ../data/processed/binance_test_returns.csv
