In [None]:
import pandas as pd
import requests
import time

# API Key
api_key = '816dce360a1c40c7abd7bcc055561bf0'

# Initialize variables
symbol = 'EUR/USD'
interval = '15min'
start_date = '2014-01-01'
end_date = '2024-01-01'
batch_size = 45  # Number of days per batch

# Function to fetch data for a specific date range
def fetch_data(start, end):
    url = f'https://api.twelvedata.com/time_series?apikey={api_key}&symbol={symbol}&interval={interval}&start_date={start}&end_date={end}&fmt=json'
    response = requests.get(url)
    return response.json()

# Initialize DataFrame to hold all data
all_data = pd.DataFrame()

# Loop through 2-year batches
for year in range(2014, 2024, 2):
    current_start = pd.to_datetime(f'{year}-01-01')
    current_end = current_start + pd.DateOffset(years=2)
    
    if current_end > pd.to_datetime(end_date):
        current_end = pd.to_datetime(end_date)
    
    # Fetch data in smaller batches to avoid API limits
    while current_start < current_end:
        batch_end = current_start + pd.DateOffset(days=batch_size)
        if batch_end > current_end:
            batch_end = current_end
        
        data = fetch_data(current_start.strftime('%Y-%m-%d'), batch_end.strftime('%Y-%m-%d'))
        if 'values' in data:
            df = pd.DataFrame(data['values'])
            all_data = pd.concat([all_data, df], ignore_index=True)
            print(f"Fetched data from {current_start.strftime('%Y-%m-%d')} to {batch_end.strftime('%Y-%m-%d')}")
        else:
            print(f"Failed to fetch data: {data}")
            break
        
        current_start = batch_end
        # Pause to avoid API rate limits
        time.sleep(300)  # 5-minute pause

# Save data to CSV file
all_data.to_csv('eurusd_15min_data.csv', index=False)
print(f"Data saved to eurusd_15min_data.csv")


# Load and preprocess data
all_data = pd.read_csv('eurusd_15min_data.csv')

# Print columns to verify available features
print("Available columns:", all_data.columns.tolist())

# Convert datetime to pandas datetime and set as index
all_data['datetime'] = pd.to_datetime(all_data['datetime'])
all_data.set_index('datetime', inplace=True)
all_data = all_data.sort_index()

# Display the number of data points
num_data_points = len(all_data)
print(f"Number of data points: {num_data_points}")

# Calculate technical indicators
all_data['SMA_5'] = all_data['close'].rolling(window=5).mean()
all_data['SMA_20'] = all_data['close'].rolling(window=20).mean()
all_data['RSI'] = calculate_rsi(all_data['close'], periods=14)
all_data['MACD'] = calculate_macd(all_data['close'])
all_data['ATR'] = calculate_atr(all_data[['high', 'low', 'close']], period=14)
all_data['Momentum'] = calculate_momentum(all_data)

# Drop NaN values
all_data.dropna(inplace=True)