In [3]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
import statistics
from scipy.stats import pearsonr
import networkx as nx
from tqdm import tqdm
import requests
from datetime import datetime

In [5]:
df = pd.read_parquet(r'C:\Users\Эвелина Новикова\100ms_df.parquet')

### Парсинг рыночных данных

In [7]:
def get_historical_trades(symbol, start_time=None, end_time=None, limit=1000):
    url = "https://api.binance.com/api/v3/aggTrades"
    params = {
        'symbol': symbol,
        'limit': limit,
    }
    if start_time:
        params['startTime'] = int(pd.Timestamp(start_time).timestamp() * 1000)
    if end_time:
        params['endTime'] = int(pd.Timestamp(end_time).timestamp() * 1000)
    
    response = requests.get(url, params=params)
    trades = response.json()
    df = pd.DataFrame(trades)
    df['time'] = pd.to_datetime(df['T'], unit='ms')
    df['price'] = df['p'].astype(float)
    df['volume'] = df['q'].astype(float)
    return df[['time', 'price', 'volume', 'f', 'l', 'm']]


def fetch_all_trades(symbol, start_time, end_time):
    all_trades = []
    current_time = pd.Timestamp(start_time)
    end_time = pd.Timestamp(end_time)
    
    while current_time < end_time:
        chunk = get_historical_trades(
            symbol=symbol,
            start_time=current_time,
            end_time=current_time + pd.Timedelta(hours=1),  # Разбиваем по часам
            limit=1000
        )
        if not chunk.empty:
            all_trades.append(chunk)
            current_time = chunk['time'].iloc[-1] + pd.Timedelta(milliseconds=1)
        else:
            current_time += pd.Timedelta(hours=1)
    
    return pd.concat(all_trades)


In [None]:
sol_trades = fetch_all_trades('SOLUSDT', '2024-11-01 00:00:00', '2025-11-01 23:59:59')

In [11]:
sol_trades.head()

Unnamed: 0,time,price,volume,f,l,m
0,2024-11-01 00:00:00.871,168.69,0.217,765243652,765243654,True
1,2024-11-01 00:00:01.105,168.69,0.398,765243655,765243666,True
2,2024-11-01 00:00:02.115,168.69,0.088,765243667,765243667,False
3,2024-11-01 00:00:02.149,168.69,0.305,765243668,765243668,False
4,2024-11-01 00:00:02.550,168.69,0.111,765243669,765243669,False


In [13]:
df.head()

Unnamed: 0_level_0,MEW,BTC,SOL,PEPE,ETH
local_timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-11-01 00:00:04.100,0.009051,70321.95,168.625,0.009116,2518.375
2024-11-01 00:00:04.200,0.009053,70321.95,168.625,0.009118,2518.375
2024-11-01 00:00:04.300,0.009053,70321.95,168.635,0.009118,2518.365
2024-11-01 00:00:04.400,0.009054,70321.95,168.635,0.009117,2518.285
2024-11-01 00:00:04.500,0.009056,70317.65,168.635,0.009117,2518.185


In [47]:
sol_buy = sol_trades[sol_trades['m'] == False]
sol_sell = sol_trades[sol_trades['m'] == True]

In [49]:
sol_buy = sol_buy.set_index('time')
sol_sell = sol_sell.set_index('time')

In [51]:
start_agg = pd.Timestamp("2024-11-01 00:00:04.100")

In [None]:
# ресемплинг рыночных данных

ohlcv_buy = sol_buy.resample("100ms", origin=start_agg).agg({
    "price": ["first", "max", "min", "last"],
    "volume": "sum"})

ohlcv_sell = sol_sell.resample("100ms", origin=start_agg).agg({
    "price": ["first", "max", "min", "last"],
    "volume": "sum"})

ohlcv_buy.columns = ["open_b", "high_b", "low_b", "close_b", "volume_b"]
ohlcv_sell.columns = ["open_s", "high_s", "low_s", "close_s", "volume_s"]

In [55]:
ohlcv_buy.ffill(inplace=True)
ohlcv_sell.ffill(inplace=True)

In [57]:
result1 = df.join(ohlcv_buy, how='left').dropna()
joined_df = result1.join(ohlcv_sell, how='left')

In [59]:
joined_df.head()

Unnamed: 0_level_0,MEW,BTC,SOL,PEPE,ETH,open_b,high_b,low_b,close_b,volume_b,open_s,high_s,low_s,close_s,volume_s
local_timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-11-01 00:00:04.100,0.009051,70321.95,168.625,0.009116,2518.375,168.69,168.69,168.69,168.69,0.0,168.68,168.68,168.68,168.68,38.121
2024-11-01 00:00:04.200,0.009053,70321.95,168.625,0.009118,2518.375,168.68,168.68,168.68,168.68,19.837,168.68,168.68,168.68,168.68,0.0
2024-11-01 00:00:04.300,0.009053,70321.95,168.635,0.009118,2518.365,168.68,168.68,168.68,168.68,0.0,168.68,168.68,168.68,168.68,0.0
2024-11-01 00:00:04.400,0.009054,70321.95,168.635,0.009117,2518.285,168.68,168.68,168.68,168.68,0.11,168.68,168.68,168.68,168.68,0.0
2024-11-01 00:00:04.500,0.009056,70317.65,168.635,0.009117,2518.185,168.68,168.68,168.68,168.68,0.0,168.68,168.68,168.68,168.68,0.0


In [61]:
joined_table = pa.Table.from_pandas(joined_df)
pq.write_table(joined_table, 'joined_df.parquet')