In [1]:
import os
import sys
sys.path.append('../')
import requests
import datetime 
import pandas as pd
import json
import pytz


import threading

request_limit = 20  # Maximum number of requests per second
semaphore = threading.Semaphore(request_limit)

In [2]:
with open('../constants/bitget_symbols.json', 'r') as file:
    data = json.load(file)

# Now you can work with the JSON data
instruments = data.keys()
len(instruments)

245

In [3]:
# timeframes= ['1W', '1m', '5m', '15m', '30m', '1H', '4H', '1D']
timeframes= ['1W']

symbol = 'INJ'
granularity = '5m'
start_time = datetime.datetime(2023, 1, 1, 0, 0)
path_db = '../DB'

In [None]:
def generate_time_intervals(start_datetime, end_datetime=datetime.datetime.now().replace(second=0, microsecond=0), timeframe='1D', range=200):
    intervals = []
    current_datetime = datetime.datetime.strptime(start_datetime, "%Y-%m-%d %H:%M:%S").replace(tzinfo=pytz.timezone('Europe/Paris'))
    end_datetime = end_datetime.replace(tzinfo=pytz.timezone('Europe/Paris'))

    # Define the size of the intervals for each timeframe
    timeframe_intervals = {
        '1m': datetime.timedelta(minutes=1*range),
        '5m': datetime.timedelta(minutes=5*range),
        '15m': datetime.timedelta(minutes=15*range),
        '30m': datetime.timedelta(minutes=30*range),
        '1H': datetime.timedelta(hours=1*range),
        '4H': datetime.timedelta(hours=4*range),
        '1D': datetime.timedelta(days=1*90),
        '1W': datetime.timedelta(weeks=1*12),
    }

    # Get the size of the intervals for the specified timeframe
    interval = timeframe_intervals[timeframe]

    # Generate the intervals
    while current_datetime < end_datetime:
        next_datetime = current_datetime + interval
        if next_datetime > end_datetime:
            next_datetime = end_datetime
        intervals.append((current_datetime.replace(second=0, microsecond=0), next_datetime.replace(second=0, microsecond=0)))
        current_datetime = next_datetime

    return intervals

In [5]:
intervals = generate_time_intervals('2024-01-01 00:00:00', timeframe='1H')
intervals

[(datetime.datetime(2024, 1, 1, 0, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>),
  datetime.datetime(2024, 1, 9, 8, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>)),
 (datetime.datetime(2024, 1, 9, 8, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>),
  datetime.datetime(2024, 1, 17, 16, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>)),
 (datetime.datetime(2024, 1, 17, 16, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>),
  datetime.datetime(2024, 1, 26, 0, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>)),
 (datetime.datetime(2024, 1, 26, 0, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>),
  datetime.datetime(2024, 2, 3, 8, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>)),
 (datetime.datetime(2024, 2, 3, 8, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>),
  datetime.datetime(2024, 2, 11, 16, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>)),
 (datetime.datetime(2024, 2, 11, 16, 0, tzinfo=<DstTzInfo 'Europe/Paris' LMT+0:09:00 ST

In [6]:
def fetch_data(symbol, granularity, interval, product='usdt-futures'):
    
    start_time, end_time = interval

    start_time = int(start_time.timestamp()) * 1000
    end_time = int(end_time.timestamp()) * 1000

    # print(start_time)
    # print(end_time)

    url = f"https://api.bitget.com/api/v2/mix/market/history-mark-candles"
    
    params = {
                    "symbol": f"{symbol}",
                    "productType": product,
                    "granularity": f"{granularity}",
                    "limit": "200",
                    "startTime": start_time,
                    "endTime": end_time
                }

    with semaphore:
        response = requests.get(url, params=params)
        data = response.json()['data']
        # print(len(data))
        return data

In [7]:
fetched = fetch_data('BTCUSDT', '1D', intervals[0])
# fetched2 = fetch_data('BTCUSDT', '1H', intervals[1], intervals[2])
fetched


[['1703952000000', '42392.9', '42928.9', '42001.4', '42475.7', '0', '0'],
 ['1704038400000', '42475.7', '42871.9', '42086.1', '42798.9', '0', '0'],
 ['1704124800000', '42798.9', '45919.5', '42683.2', '45197.1', '0', '0'],
 ['1704211200000', '45197.1', '45527.1', '40406.5', '42982.7', '0', '0'],
 ['1704297600000', '42982.7', '44327.2', '42043.1', '44036.4', '0', '0'],
 ['1704384000000', '44036.4', '44723.4', '42388.4', '43489.9', '0', '0'],
 ['1704470400000', '43489.9', '44303.7', '43095.9', '43853.9', '0', '0'],
 ['1704556800000', '43853.9', '44466.2', '43665.6', '44035.5', '0', '0'],
 ['1704643200000', '44035.5', '45334.3', '43162.1', '44967.7', '0', '0']]

In [8]:
def to_dataframe(data):
    # print(data)
    df = pd.DataFrame(data, columns=['datetime', 'open', 'high', 'low', 'close', 'volume', 'quote_volume'])
    df['datetime'] = pd.to_datetime(df['datetime'].astype(int), unit='ms', origin='unix')
    df.set_index('datetime', inplace=True)
    return df

In [9]:
df1 = to_dataframe(fetched)
df1

Unnamed: 0_level_0,open,high,low,close,volume,quote_volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-12-30 16:00:00,42392.9,42928.9,42001.4,42475.7,0,0
2023-12-31 16:00:00,42475.7,42871.9,42086.1,42798.9,0,0
2024-01-01 16:00:00,42798.9,45919.5,42683.2,45197.1,0,0
2024-01-02 16:00:00,45197.1,45527.1,40406.5,42982.7,0,0
2024-01-03 16:00:00,42982.7,44327.2,42043.1,44036.4,0,0
2024-01-04 16:00:00,44036.4,44723.4,42388.4,43489.9,0,0
2024-01-05 16:00:00,43489.9,44303.7,43095.9,43853.9,0,0
2024-01-06 16:00:00,43853.9,44466.2,43665.6,44035.5,0,0
2024-01-07 16:00:00,44035.5,45334.3,43162.1,44967.7,0,0


In [None]:
# df2 = to_dataframe(fetched2)
# df2

NameError: name 'fetched2' is not defined

In [None]:
# df = pd.concat([df1, df2], axis=0)
# df

In [None]:
for symbol in instruments:
    for tf in timeframes:
        intervals = generate_time_intervals(start_time, timeframe=tf)  

        print(f'fetching __ {symbol} on {tf}')

        print(intervals)
        fetched = []
        for interval in intervals:
            
            
            
            if data:
                fetched.extend(data)

        df = pd.DataFrame(fetched, columns=['datetime', 'open', 'high', 'low', 'close', 'volume', 'quote_volume'])
        df['datetime'] = pd.to_datetime(df['datetime'].astype(int), unit='ms', origin='unix')
        df.set_index('datetime', inplace=True)
        df



In [None]:
fetch_data()

In [None]:
# # Check if the parent directory exists
# if os.path.exists(path_db):
#     # Create the directory
#     os.mkdir(f'../DB/{symbol}')
# else:
#     df.to_csv(f'../DB/{symbol}/{symbol}USDT_test_{tf}.csv')

In [None]:
range = 200
timeframe_intervals = {
        '1W': datetime.timedelta(weeks=1*12),
        '1m': datetime.timedelta(minutes=1*range),
        '5m': datetime.timedelta(minutes=5*range),
        '15m': datetime.timedelta(minutes=15*range),
        '30m': datetime.timedelta(minutes=30*range),
        '1H': datetime.timedelta(hours=1*range),
        '4H': datetime.timedelta(hours=4*range),
        '1D': datetime.timedelta(days=1*90),
    }

for time in timeframe_intervals.values():
    print(time > datetime.timedelta(days=90))

In [None]:
df_to_check = pd.read_csv('../DB/15m/BTCUSDT_15m.csv')
df_to_check

In [None]:
def check_missing_rows(df, time_frame):
    # Convert the datetime column to datetime type
    df['datetime'] = pd.to_datetime(df['datetime'])

    # Set the datetime column as the index
    df.set_index('datetime', inplace=True)

    # Resample the DataFrame to the specified time frame and count the number of rows in each interval
    resampled = df.resample(time_frame).count()

    # Check if there are any intervals with less than the expected number of rows
    missing_rows = resampled[resampled['open'] < 1]

    # Return True if there are any missing rows, False otherwise
    return len(missing_rows) > 0

missing_rows = check_missing_rows(df_to_check, '4H')
print(missing_rows)