In [1]:
from pybit.unified_trading import HTTP

import dotenv
import os

from time import sleep, time
import datetime as dt

import pandas as pd

import tqdm

dotenv.load_dotenv('secrets.env')

API_KEY = os.getenv('API')
SECRET_KEY = os.getenv('SECRET')

session = HTTP(
    testnet=False,
    api_key=API_KEY,
    api_secret=SECRET_KEY,
)

In [2]:

def get_kline(interval, days_forward=365, start_year=2022, symbol='BTCUSD'):

    t1 = time()

    data = []
    base = int(dt.datetime(start_year, 1, 1, 0, 00, 00, 000000).timestamp() * 1000)


    for i in tqdm.tqdm(range(base, base + (60 * 1000) * ((60*24)*(days_forward)), (60 * 1000)*(12*60))):

        returns = session.get_kline(
                    symbol=symbol,
                    interval=interval,
                    start=i,
                    end=(i + (60*(60*1000))*12)-1,
                    limit=1000)
        
        data.extend(returns['result']['list'])

        # sleep(1)

    output_df = pd.DataFrame(data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume', 'turnover'])

    for col in output_df.columns:
        try:
            output_df[col] = output_df[col].astype(int)
        except ValueError:
            output_df[col] = output_df[col].astype(float)

    output_df['timestamp'] = output_df['timestamp'].apply(lambda x: dt.datetime.fromtimestamp(x / 1000).strftime('%Y-%m-%d %H:%M:%S'))
    output_df['timestamp'] = pd.to_datetime(output_df['timestamp'])
    output_df.sort_values(by='timestamp', inplace=True)

    print(f'Done in {int(time() - t1)} seconds.')

    return output_df

df = get_kline(1, days_forward=(365*4))

100%|██████████| 2920/2920 [12:24<00:00,  3.92it/s]


Done in 751 seconds.


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1710506 entries, 719 to 1710000
Data columns (total 7 columns):
 #   Column     Dtype         
---  ------     -----         
 0   timestamp  datetime64[ns]
 1   open       float64       
 2   high       float64       
 3   low        float64       
 4   close      float64       
 5   volume     int64         
 6   turnover   float64       
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 104.4 MB


In [None]:
def check_time_series(df=df, interval=1):
    df['diff'] = df['timestamp'].diff()

    # Проверяем, что все разницы равны 1 минуте
    if (df['diff'].iloc[1:] != pd.Timedelta(minutes=interval)).any():
        print("Временной ряд не является непрерывным по минутам")
    else:
        print("Временной ряд непрерывен по минутам")

    df.drop(columns=['diff'], inplace=True)

check_time_series()

Временной ряд непрерывен по минутам


In [11]:
df.reset_index(drop=True, inplace=True)

In [13]:
df.to_parquet('btcusd_1min_4years_2022.parquet')

In [14]:
df2 = pd.read_parquet('btcusd_1min_4years_2022.parquet')

In [16]:
df2

Unnamed: 0,timestamp,open,high,low,close,volume,turnover
0,2022-01-01 00:00:00,45849.5,45904.0,45849.5,45863.5,2513626,54.767864
1,2022-01-01 00:01:00,45863.5,45878.5,45840.5,45878.5,1389570,30.301774
2,2022-01-01 00:02:00,45878.5,45936.5,45878.0,45936.0,1547563,33.708247
3,2022-01-01 00:03:00,45936.0,46018.0,45936.0,46005.5,3086633,67.139093
4,2022-01-01 00:04:00,46005.5,46044.0,46005.0,46044.0,708700,15.402746
...,...,...,...,...,...,...,...
1710501,2025-04-02 20:21:00,87090.0,87101.9,87054.1,87054.1,32979,0.378768
1710502,2025-04-02 20:22:00,87054.1,87054.1,86867.9,86899.5,1482601,17.057325
1710503,2025-04-02 20:23:00,86899.5,86957.9,86876.1,86926.7,352114,4.051255
1710504,2025-04-02 20:24:00,86926.7,86958.4,86915.7,86953.2,117997,1.357082


In [5]:
# base = int(dt.datetime(2022, 1, 1, 0, 00, 00, 000000).timestamp() * 1000)

# dt.datetime.fromtimestamp(base / 1000).strftime('%Y-%m-%d %H:%M:%S')

In [6]:
# base + 60 * 1000
# dt.datetime.fromtimestamp((base + 60 * 1000) / 1000).strftime('%Y-%m-%d %H:%M:%S')

In [7]:
# l = []

# for i in range(base, base + (60 * 1000) * ((60*24)*(365*1)), (60 * 1000)*(12*60)):
#     # l.append(i)
#     print(dt.datetime.fromtimestamp(i / 1000).strftime('%Y-%m-%d %H:%M:%S'))
#     print(dt.datetime.fromtimestamp(((i + (60*(60*1000))*12)-1) / 1000).strftime('%Y-%m-%d %H:%M:%S'))
#     # print('')