# Part 1 (Data Collection, Processing, and Resampling)

## Imports

In [112]:
import requests
from abc import ABC, abstractmethod
import time
import binance
import pandas as pd
from requests.exceptions import ConnectTimeout
from tqdm import tqdm
import mplfinance as mpf
import os
import plotly.express as px
import numpy as np
from math import ceil
from scipy.stats import zscore
from statsmodels.robust.scale import mad
from scipy.stats import median_abs_deviation
import plotly.graph_objects as go

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Constants

### Time Intervals

In [3]:
from datetime import datetime

start_date = datetime(2023, 1, 1)
finish_date = datetime(2024, 1, 1)

start_seconds = int(start_date.timestamp())
finish_seconds = int(finish_date.timestamp())

start_ms = int(start_date.timestamp() * 1000)
finish_ms = int(finish_date.timestamp() * 1000)

### Folders

In [4]:
DATA_DIR = 'data/'
RAW_DATA_DIR = DATA_DIR + 'raw data/'
ROBUST_DATA_DIR = DATA_DIR + 'robust raw data/'

## Data Collection (APIs)

### Nobitex

In [None]:
class NobitexAPI(ABC):
    base_url = 'https://chart.nobitex.ir'

    def __init__(self, data, status, errmsg=None):
        self.data = data
        self.status = status
        self.errmsg = errmsg

    def __repr__(self):
        return f"{self.__class__.__name__}(status={self.status}, data={self.data}, errmsg={self.errmsg})"

    @classmethod
    @abstractmethod
    def fetch(cls, **kwargs):
        """Fetch data from Nobitex API. Must be implemented by subclasses."""
        pass

    @classmethod
    def get(cls, endpoint, params):
        """Generic GET request method."""
        url = f"{cls.base_url}{endpoint}"
        url_with_params = requests.Request('GET', url, params=params).prepare().url
        
        # Print the full URL with parameters
        print(f"Fetching: {url_with_params}")
        
        response = requests.get(url, params=params)
        data = response.json()
        return data

class NobitexMarketHistory(NobitexAPI):
    @classmethod
    def fetch(cls, symbol, resolution, to, from_=None, countback=None, page=1):
        # Ensure resolution is in the correct format
        if isinstance(resolution, int):
            resolution = str(resolution)

        # Define maximum 40-day interval in seconds (40 days * 24 hours * 60 minutes * 60 seconds)
        max_interval = 30 * 24 * 60 * 60

        data = []
        current_from = from_
        current_to = min(to, current_from + max_interval)
        
        while current_from < to:
            endpoint = '/market/udf/history'
            params = {
                'symbol': symbol,
                'resolution': resolution,
                'from': current_from,
                'to': current_to,
                'countback': countback,
                'page': page
            }
            params = {k: v for k, v in params.items() if v is not None}
            
            # Use the generic get method from NobitexAPI
            result = cls.get(endpoint, params)
            
            # Process result
            if result.get('s') == 'ok':
                # Combine all data fields into a list of dictionaries
                combined_data = [
                    {
                        'open_time': t,
                        'open': o,
                        'high': h,
                        'low': l,
                        'close': c,
                        'volume': v,
                    }
                    for t, o, h, l, c, v in zip(result['t'], result['o'], result['h'], result['l'], result['c'], result['v'])
                ]
                data.extend(combined_data)
            elif result.get('s') == 'error':
                return cls(data=None, status='error', errmsg=result.get('errmsg'))
            elif result.get('s') == 'no_data':
                return cls(data=None, status='no_data')
            
            # Update the from and to timestamps for the next request
            current_from = current_to
            current_to = min(to, current_from + max_interval)
            time.sleep(1)  # Avoid hitting API rate limits

        return cls(data=data, status='ok')

    def to_dataframe(self):
        """Convert data to pandas DataFrame with specified columns."""
        df = pd.DataFrame(self.data)
        return df

In [None]:
nobitex_history = NobitexMarketHistory.fetch(symbol='BTCIRT', resolution='1', from_=start_seconds, to=finish_seconds)
len(nobitex_history.data)

### Binance

#### Client

In [70]:
binance_client = binance.Client()

#### Fetch Data

In [71]:
def fetch_binance_data(symbol, interval, start_ms, finish_ms):
    all_klines = []

    # Approximate number of iterations (assuming 1 minute interval and 1000 klines per request)
    total_iterations = ceil((finish_ms - start_ms) // (60000 * 1000)) + 1
    
    with tqdm(total=total_iterations) as pbar:
        while True:
            try:
                klines = binance_client.get_klines(symbol=symbol, interval=interval, startTime=start_ms, endTime=finish_ms, limit=1000)
                if not klines:
                    print("No more data returned.")
                    break

                all_klines.extend(klines)
                start_ms = klines[-1][6] + 1
                pbar.update(1)  # Update the progress bar for each iteration

                # if klines[-1][6] >= finish_ms:
                #     print("Reached the end of the specified time range.")
                #     break
            except ConnectTimeout:
                print("Connection timed out. Retrying...")
                time.sleep(5)
            except Exception as e:
                print(f"An error occurred: {e}")
                break

    if all_klines:
        df = pd.DataFrame(all_klines, columns=['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])
        df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
        df['close_time'] = pd.to_datetime(df['close_time'], unit='ms')
        return df
    else:
        print("No data was fetched.")
        return pd.DataFrame()

In [72]:
symbol = 'BTCUSDT'
interval = '1m' 
binance_df = fetch_binance_data(symbol, interval, start_ms, finish_ms)

100%|██████████| 571/571 [05:55<00:00,  1.61it/s]

No more data returned.





In [73]:
print(binance_df.shape)
binance_df.head()

(570161, 12)


Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
0,2022-12-31 20:30:00,16570.99,16573.62,16569.74,16572.11,61.96374,2022-12-31 20:30:59.999,1026824.1669868,2043,31.94032,529305.0157997,0
1,2022-12-31 20:31:00,16572.52,16573.2,16571.2,16572.65,42.60972,2022-12-31 20:31:59.999,706139.9319175,1671,20.16955,334258.9160295,0
2,2022-12-31 20:32:00,16572.65,16573.2,16570.88,16571.0,18.42637,2022-12-31 20:32:59.999,305359.9296266,989,8.08882,134047.2657383,0
3,2022-12-31 20:33:00,16571.33,16572.85,16570.6,16572.54,20.70374,2022-12-31 20:33:59.999,343100.3534753,993,11.1742,185178.7635664,0
4,2022-12-31 20:34:00,16572.54,16572.84,16571.31,16571.31,14.77349,2022-12-31 20:34:59.999,244824.9488144,902,8.05014,133406.4797428,0


In [75]:
def load_and_preprocess_binance_data(file_name='Binance-BTCUSDT.csv'):
    global binance_df
    
    if 'binance_df' not in globals():
        file_path = os.path.join(DATA_DIR, file_name)
        
        if os.path.exists(file_path):
            binance_df = pd.read_csv(file_path)
            print(f"Loaded data from {file_path}")
        else:
            print(f"File {file_path} does not exist. Check the path.")
            return
    else:
        print("binance_df is already defined.")

    binance_df['close_time'] = pd.to_datetime(binance_df['close_time'])
    
    columns_to_convert = ['open', 'high', 'low', 'close', 'volume', 
                          'quote_asset_volume', 'number_of_trades', 
                          'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume']
    binance_df[columns_to_convert] = binance_df[columns_to_convert].apply(pd.to_numeric, errors='coerce')
    
    binance_df.dropna(inplace=True)
    binance_df['open_time'] = pd.to_datetime(binance_df['open_time'])
    binance_df.set_index('open_time', inplace=True)
    binance_df.sort_index()
    print("Data preprocessing complete.")


In [76]:
load_and_preprocess_binance_data()

binance_df is already defined.
Data preprocessing complete.


In [82]:
print(binance_df.index[0])
print(binance_df.index[-1])

2023-01-01 00:00:00
2024-01-01 00:00:00


#### Save CSV

In [81]:
start_time = pd.to_datetime(start_date)
end_time = pd.to_datetime(finish_date)

binance_df = binance_df[(binance_df.index >= start_time) & (binance_df.index <= end_time)]

In [83]:
binance_df.to_csv(RAW_DATA_DIR + f'Binance-{symbol}.csv')

### Load CSV

In [None]:
binance_df = pd.read_csv(ROBUST_DATA_DIR + f'Binance-{symbol}.csv')
binance_df['open_time'] = pd.to_datetime(binance_df['open_time'])
binance_df.set_index('open_time', inplace=True)

#### Load & Pre-Process

#### Visualization

In [None]:
def plot_candles(df):
    if df.empty:
        print("The DataFrame is empty. No data to plot.")
        return
    
    df.index.name = 'Date'
    df = df[['open', 'high', 'low', 'close', 'volume']]
    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']

    mpf.plot(df, type='candle', volume=True, style='charles', title=f'Candles')

In [None]:
plot_candles(binance_df[:100])

## Data Pre-Processing

### Handling Missing Data

To address missing candles in a time series dataset, particularly for financial data like Bitcoin trading, it's crucial to maintain the continuity and integrity of the data series. Here are a couple of simple strategies that can be used to fill in missing data:

1. Carry Forward the Last Known Value (Last Observation Carried Forward - LOCF)
    In this method, if a time interval is missing, the data from the last available interval is carried forward to fill the gap. This approach assumes that the market conditions did not change significantly between the missing intervals.

2. Simple Average of Adjacent Data Points
    This method uses the average of the data points before and after the missing interval to fill the gap. It assumes that the missing data is likely to be similar to the average of the surrounding points.

Considerations
- Granularity: Given that the data is on a one-minute scale, gaps might not significantly impact the analysis if they are sparse.
- Market Conditions: Financial markets can change rapidly, so the chosen method should be as accurate as possible, given the limitations.

In [8]:
def load_csv(input_path):
    return pd.read_csv(input_path)

def save_csv(df, output_path):
    df.to_csv(output_path, index=True)

In [9]:
def fill_missing_data(df, start_date, finish_date, method='polynomial'):
    # Rename the 'datetime' column to 'open_time'
    df.rename(columns={'datetime': 'open_time'}, inplace=True)
    
    # Set 'open_time' as the index and sort it
    df.set_index('open_time', inplace=True)
    df.sort_index(inplace=True)
    
    # Ensure the index is a DatetimeIndex
    if not isinstance(df.index, pd.DatetimeIndex):
        df.index = pd.to_datetime(df.index)
    
    # Trim the dataset to the specified date range
    df = df.loc[start_date:finish_date]
    
    # Handle missing values based on the specified method
    if method == 'ffill':
        df_processed = df.resample('min').ffill()
    elif method == 'interpolate':
        df_processed = df.resample('min').interpolate(method='linear')
        df_processed.ffill(axis=0, inplace=True)  # For any remaining missing values
    elif method == 'polynomial':
        df_processed = df.resample('min').interpolate(method='polynomial', order=2)
        df_processed.ffill(axis=0, inplace=True)  # For any remaining missing values
    elif method == 'spline':
        df_processed = df.resample('min').interpolate(method='spline', order=3)
        df_processed.ffill(axis=0, inplace=True)  # For any remaining missing values
    elif method == 'time':
        df_processed = df.resample('min').interpolate(method='time')
        df_processed.ffill(axis=0, inplace=True)  # For any remaining missing values
    else:
        raise ValueError("Invalid method. Choose either 'ffill', 'interpolate', 'polynomial', 'spline', or 'time'.")
    
    print('Converting Candles...')
    print(f'From: {df.index.min()}')
    print(f'To: {df.index.max()}')
    print(30 * '-')
    
    return df_processed

### Outlier Detection and Correction

In [37]:
def rolling_zscore_outlier_handler(df, columns=None, threshold=4.0, window=90):
    if columns is None:
        columns = df.select_dtypes(include=[np.number]).columns
    else:
        columns = [col for col in columns if col in df.columns]
    
    for col in columns:
        rolling_median = df[col].rolling(window=window, center=True).median()
        rolling_mad = df[col].rolling(window=window, center=True).apply(
            lambda x: median_abs_deviation(x, scale='normal')
        )
        rolling_mad[rolling_mad == 0] = 1
        rolling_z_scores = np.abs((df[col] - rolling_median) / rolling_mad)
        outliers = rolling_z_scores > threshold
        num_outliers = np.sum(outliers)
        
        if num_outliers > 0:
            print(f"Handling {num_outliers} outliers in column {col}.")
            for idx in df[outliers].index:
                lower_bound = max(0, idx - window // 2)
                upper_bound = min(len(df) - 1, idx + window // 2)
                median_val = df[col].iloc[lower_bound:upper_bound].median()
                df.at[idx, col] = median_val

    print("Handled outliers in the data using rolling Z-score method.")
    return df

def rolling_boxplot_outlier_handler(df, columns=None, iqr_multiplier=1.5, window=30):
    if columns is None:
        columns = df.select_dtypes(include=[np.number]).columns
    else:
        columns = [col for col in columns if col in df.columns]
    
    for col in columns:
        rolling_q1 = df[col].rolling(window=window, center=True).quantile(0.25)
        rolling_q3 = df[col].rolling(window=window, center=True).quantile(0.75)
        rolling_iqr = rolling_q3 - rolling_q1
        lower_bound = rolling_q1 - iqr_multiplier * rolling_iqr
        upper_bound = rolling_q3 + iqr_multiplier * rolling_iqr
        outliers = (df[col] < lower_bound) | (df[col] > upper_bound)
        num_outliers = np.sum(outliers)

        if num_outliers > 0:
            print(f"Handling {num_outliers} outliers in column {col}.")
            for idx in df[outliers].index:
                lower_idx = max(0, idx - window // 2)
                upper_idx = min(len(df) - 1, idx + window // 2)
                median_val = df[col].iloc[lower_idx:upper_idx].median()
                df.at[idx, col] = median_val

    print("Handled outliers in the data using rolling boxplot method.")
    return df

In [25]:
input_paths = [
    RAW_DATA_DIR + 'Binance-BTCUSDT.csv',
    RAW_DATA_DIR + 'btctmn_m1_tabdeal.csv',
    RAW_DATA_DIR + 'usdttmn_m1_nobitex.csv',
    RAW_DATA_DIR + 'usdttmn_m1_tabdeal.csv',
    RAW_DATA_DIR + 'usdttmn_m1_wallex.csv'
]

output_paths = [
    ROBUST_DATA_DIR + 'Binance-BTCUSDT.csv',
    ROBUST_DATA_DIR + 'Tabdeal-BTCTMN.csv',
    ROBUST_DATA_DIR + 'Nobitex-USDTTMN.csv',
    ROBUST_DATA_DIR + 'Tabdeal-USDTTMN.csv',
    ROBUST_DATA_DIR + 'Wallex-USDTTMN.csv'
]

In [27]:
for input_path, output_path in zip(input_paths, output_paths):
    df = load_csv(input_path)
    df_processed = fill_missing_data(df, start_date, finish_date)
    # df_without_outliers = rolling_zscore_outlier_handler(df_processed)
    save_csv(df_processed, output_path)

Converting Candles...
From: 2023-01-01 00:00:00
To: 2024-01-01 00:00:00
------------------------------
Converting Candles...
From: 2023-01-01 00:00:00
To: 2024-01-01 00:00:00
------------------------------
Converting Candles...
From: 2023-01-01 00:00:00
To: 2024-01-01 00:00:00
------------------------------
Converting Candles...
From: 2023-01-01 00:00:00
To: 2024-01-01 00:00:00
------------------------------
Converting Candles...
From: 2023-01-01 00:00:00
To: 2024-01-01 00:00:00
------------------------------


## Implied Data Extraction

In [28]:
btc_usdt = pd.read_csv(r'data\robust raw data\Binance-BTCUSDT.csv')
btc_tmn = pd.read_csv(r'data\robust raw data\Tabdeal-BTCTMN.csv')

### Processing OHLCV Data into a Representative Price Series

In [29]:
# Convert open_time to datetime
btc_usdt['open_time'] = pd.to_datetime(btc_usdt['open_time'])
btc_tmn['open_time'] = pd.to_datetime(btc_tmn['open_time'])

# Representative price calculations
btc_usdt['close_price'] = btc_usdt['close']
btc_tmn['close_price'] = btc_tmn['close']

btc_usdt['weighted_avg_price'] = (btc_usdt['open'] * 0.2 + btc_usdt['high'] * 0.2 + btc_usdt['low'] * 0.2 + btc_usdt['close'] * 0.4)
btc_tmn['weighted_avg_price'] = (btc_tmn['open'] * 0.2 + btc_tmn['high'] * 0.2 + btc_tmn['low'] * 0.2 + btc_tmn['close'] * 0.4)

btc_usdt['average_price'] = btc_usdt[['open', 'high', 'low', 'close']].mean(axis=1)
btc_tmn['average_price'] = btc_tmn[['open', 'high', 'low', 'close']].mean(axis=1)

# Merge the DataFrames on 'open_time'
merged_df = pd.merge(btc_usdt, btc_tmn, on='open_time', suffixes=('_usdt', '_tmn'))

### Implied USDT-TMN Extraction

In [30]:
merged_df['close_price'] = merged_df['close_price_tmn'] / merged_df['close_price_usdt']
merged_df['weighted_avg_price'] = merged_df['weighted_avg_price_tmn'] / merged_df['weighted_avg_price_usdt']
merged_df['average_price'] = merged_df['average_price_tmn'] / merged_df['average_price_usdt']

result_df = merged_df[['open_time', 'close_price', 'average_price', 'weighted_avg_price']]
result_df.set_index('open_time', inplace=True)

In [31]:
result_df

Unnamed: 0_level_0,close_price,average_price,weighted_avg_price
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01 00:00:00,39492.634282,39496.233277,39495.513425
2023-01-01 00:01:00,39483.525975,39488.510432,39487.513637
2023-01-01 00:02:00,39511.618469,39497.600853,39500.404183
2023-01-01 00:03:00,39589.426957,39551.595836,39559.161716
2023-01-01 00:04:00,39585.284947,39590.330892,39589.321600
...,...,...,...
2023-12-31 23:56:00,51496.272007,51470.823315,51475.913759
2023-12-31 23:57:00,51427.345828,51439.651933,51437.189879
2023-12-31 23:58:00,51365.185417,51399.888289,51392.947422
2023-12-31 23:59:00,51343.329042,51389.360170,51380.152764


In [32]:
result_df.to_csv(ROBUST_DATA_DIR + 'Implied-USDTTMN.csv')

## USDT-TMN Analysis

### Loading Datasets

In [57]:
def convert_to_datetime(df):
    df['open_time'] = pd.to_datetime(df['open_time'], errors='coerce')
    df.set_index('open_time', inplace=True)
    df.sort_index(inplace=True)
    return df

nobitex_usdttmn = convert_to_datetime(pd.read_csv(r'data/robust raw data/Nobitex-USDTTMN.csv'))
tabdeal_usdttmn = convert_to_datetime(pd.read_csv(r'data/robust raw data/Tabdeal-USDTTMN.csv'))
wallex_usdttmn = convert_to_datetime(pd.read_csv(r'data/robust raw data/Wallex-USDTTMN.csv'))
implied_usdttmn = convert_to_datetime(pd.read_csv(r'data/robust raw data/Implied-USDTTMN.csv'))

### Pre-Processing

In [40]:
def process_data(df):
    df['open_time'] = pd.to_datetime(df['open_time'])
    df['close_price'] = df['close']
    df['weighted_avg_price'] = (df['open'] * 0.2 + df['high'] * 0.2 + df['low'] * 0.2 + df['close'] * 0.4)
    df['average_price'] = df[['open', 'high', 'low', 'close']].mean(axis=1)
    rolling_zscore_outlier_handler(df, columns=['close_price', 'weighted_avg_price', 'average_price'], threshold=5, window=60)
    return df

nobitex_usdttmn = process_data(nobitex_usdttmn)
tabdeal_usdttmn = process_data(tabdeal_usdttmn)
wallex_usdttmn = process_data(wallex_usdttmn)

Handling 6727 outliers in column close_price.
Handling 509 outliers in column weighted_avg_price.
Handling 489 outliers in column average_price.
Handled outliers in the data using rolling Z-score method.
Handling 26462 outliers in column close_price.
Handling 18505 outliers in column weighted_avg_price.
Handling 18329 outliers in column average_price.
Handled outliers in the data using rolling Z-score method.
Handling 5183 outliers in column close_price.
Handling 1241 outliers in column weighted_avg_price.
Handling 1173 outliers in column average_price.
Handled outliers in the data using rolling Z-score method.


In [41]:
merged_data = implied_usdttmn.merge(nobitex_usdttmn[['open_time', 'close_price', 'average_price', 'weighted_avg_price']], on='open_time', how='inner', suffixes=('', '_nobitex'))
merged_data = merged_data.merge(tabdeal_usdttmn[['open_time', 'close_price', 'average_price', 'weighted_avg_price']], on='open_time', how='inner', suffixes=('', '_tabdeal'))
merged_data = merged_data.merge(wallex_usdttmn[['open_time', 'close_price', 'average_price', 'weighted_avg_price']], on='open_time', how='inner', suffixes=('', '_wallex'))

merged_data.rename(columns={
    'close_price': 'implied_close_price',
    'close_price_nobitex': 'nobitex_close_price',
    'close_price_tabdeal': 'tabdeal_close_price',
    'close_price_wallex': 'wallex_close_price',
    'average_price': 'implied_average_price',
    'average_price_nobitex': 'nobitex_average_price',
    'average_price_tabdeal': 'tabdeal_average_price',
    'average_price_wallex': 'wallex_average_price',
    'weighted_avg_price': 'implied_weighted_avg_price',
    'weighted_avg_price_nobitex': 'nobitex_weighted_avg_price',
    'weighted_avg_price_tabdeal': 'tabdeal_weighted_avg_price',
    'weighted_avg_price_wallex': 'wallex_weighted_avg_price'
}, inplace=True)

### Save Output

In [48]:
merged_data.to_csv(ROBUST_DATA_DIR + 'Representative_Price_Series.csv')

### Visualizations

In [164]:
fig1 = px.line(
    merged_data[:2000],
    x='open_time',
    y=['implied_close_price', 'nobitex_close_price', 'tabdeal_close_price', 'wallex_close_price'],
    labels={'value': 'Close Price', 'variable': 'Source'},
    title='Comparison of Close Prices',
    template='plotly_dark'
)
fig1.show()

In [126]:
fig2 = px.line(
    merged_data[-4000:-2000],
    x='open_time',
    y=['implied_average_price', 'nobitex_average_price', 'tabdeal_average_price', 'wallex_average_price'],
    labels={'value': 'Average Price', 'variable': 'Source'},
    title='Comparison of Average Prices',
    template='plotly_dark'
)
fig2.show()

In [125]:
fig3 = px.line(
    merged_data[-2000:],
    x='open_time',
    y=['implied_weighted_avg_price', 'nobitex_weighted_avg_price', 'tabdeal_weighted_avg_price', 'wallex_weighted_avg_price'],
    labels={'value': 'Weighted Average Price', 'variable': 'Source'},
    title='Comparison of Weighted Average Prices - USDTTMN',
    template='plotly_dark'
)
fig3.show()

## Resampling

In [179]:
def resample_data(df, time_scales, method):
    if not isinstance(df.index, pd.DatetimeIndex):
        df.index = pd.to_datetime(df.index)

    resampled_data_list = []

    for scale in time_scales:
        if method == 'Last':
            resampled = df.resample(f'{scale}T').agg({
                'open': 'first',
                'high': 'max',
                'low': 'min',
                'close': 'last',
                'volume': 'sum',
            })

        elif method == 'TWAP':
            def twap(group):
                time_deltas = group.index - group.index[0]
                total_time = time_deltas[-1].total_seconds()
                weights = time_deltas.total_seconds()
                return (group['close'] * weights).sum() / total_time
            
            resampled = df.resample(f'{scale}T').apply(
                lambda x: pd.Series({
                    'open': x['open'].iloc[0],
                    'high': x['high'].max(),
                    'low': x['low'].min(),
                    'close': twap(x),
                    'volume': x['volume'].sum()
                })
            )

        elif method == 'VWAP':
            def vwap(group):
                return (group['close'] * group['volume']).sum() / group['volume'].sum()

            resampled = df.resample(f'{scale}T').apply(
                lambda x: pd.Series({
                    'open': x['open'].iloc[0],
                    'high': x['high'].max(),
                    'low': x['low'].min(),
                    'close': vwap(x),
                    'volume': x['volume'].sum()
                })
            )

        else:
            raise ValueError(f"Invalid method: {method}. Choose from 'TWAP', 'VWAP', or 'Last'.")

        resampled['time_scale'] = scale
        resampled_data_list.append(resampled)

    resampled_data = pd.concat(resampled_data_list)

    return resampled_data

### Resampling Plot

In [135]:
def plot_candlestick_chart_plotly(data, time_scales=[5, 20, 60, 1440], method='Last'):
    resampled_data = resample_data(data, time_scales, method)
    # Create a figure
    fig = go.Figure()

    # Add candlestick traces for each time scale
    for scale in resampled_data['time_scale'].unique():
        scale_data = resampled_data[resampled_data['time_scale'] == scale]
        fig.add_trace(go.Candlestick(
            x=scale_data.index,
            open=scale_data['open'],
            high=scale_data['high'],
            low=scale_data['low'],
            close=scale_data['close'],
            name=f'{scale} minutes'
        ))

    # Update layout
    fig.update_layout(
        title=f'Resampled Data Using {method} Method',
        xaxis_title='Time',
        yaxis_title='Price',
        legend_title='Time Scale',
        template='plotly_dark'
    )

    # Show the figure
    fig.show()

In [138]:
plot_candlestick_chart_plotly(nobitex_usdttmn[:3000], method='Last')

In [180]:
plot_candlestick_chart_plotly(nobitex_usdttmn[:3000], method='TWAP')

In [140]:
plot_candlestick_chart_plotly(nobitex_usdttmn[:3000], method='VWAP')