# Part 2 (Exploratory Data Analysis)

## Imports

In [7]:
import requests
from abc import ABC, abstractmethod
import time
import binance
import pandas as pd
from requests.exceptions import ConnectTimeout
from tqdm import tqdm
import mplfinance as mpf
import os
import plotly.express as px
import numpy as np
from math import ceil
from scipy.stats import zscore
from statsmodels.robust.scale import mad
from scipy.stats import median_abs_deviation
import plotly.graph_objects as go
from scipy.stats import shapiro, skew, kurtosis, norm
from statsmodels.tsa.api import SimpleExpSmoothing
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf, pacf
from plotly.subplots import make_subplots
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Constants

### Time Intervals

In [3]:
from datetime import datetime

start_date = datetime(2023, 1, 1)
finish_date = datetime(2024, 1, 1)

start_seconds = int(start_date.timestamp())
finish_seconds = int(finish_date.timestamp())

start_ms = int(start_date.timestamp() * 1000)
finish_ms = int(finish_date.timestamp() * 1000)

### Folders

In [44]:
DATA_DIR = 'data/'
RAW_DATA_DIR = DATA_DIR + 'raw data/'
ROBUST_DATA_DIR = DATA_DIR + 'robust raw data/'
RESAMPLED_DATA_DIR = DATA_DIR + 'resampled data/'
RESAMPLED_1MIN_DATA_DIR = RESAMPLED_DATA_DIR + '1 Minute/'
RESAMPLED_5MIN_DATA_DIR = RESAMPLED_DATA_DIR + '5 Minutes/'
RESAMPLED_20MIN_DATA_DIR = RESAMPLED_DATA_DIR + '20 Minutes/'
RESAMPLED_1H_DATA_DIR = RESAMPLED_DATA_DIR + '1 Hour/'
RESAMPLED_1D_DATA_DIR = RESAMPLED_DATA_DIR + '1 Day/'
RESAMPLED_1MONTH_DATA_DIR = RESAMPLED_DATA_DIR + '1 Month/'

## Log Returns, Volatility, and Normality Assessment

### Load Dataset

In [39]:
df = pd.read_csv(ROBUST_DATA_DIR + 'Representative_Price_Series.csv')

In [63]:
df = df[:30*24*60]

In [8]:
df.columns

Index(['Unnamed: 0', 'open_time', 'implied_close_price',
       'implied_average_price', 'implied_weighted_avg_price',
       'nobitex_close_price', 'nobitex_average_price',
       'nobitex_weighted_avg_price', 'tabdeal_close_price',
       'tabdeal_average_price', 'tabdeal_weighted_avg_price',
       'wallex_close_price', 'wallex_average_price',
       'wallex_weighted_avg_price'],
      dtype='object')

### Calculations

In [40]:
# Compute log returns for each close price column
close_price_columns = [col for col in df.columns if 'close_price' in col]
log_returns = np.log(df[close_price_columns] / df[close_price_columns].shift(1))

# EWMA Volatility calculation
lambda_ = 0.94  # Decay factor, can be adjusted
ewma_volatility = log_returns.ewm(span=(2/(1-lambda_))-1, adjust=False).std()

# Statistical summaries
log_return_stats = log_returns.describe().T
log_return_stats['skewness'] = log_returns.skew()
log_return_stats['kurtosis'] = log_returns.kurtosis()

# Shapiro-Wilk test for normality
shapiro_results = log_returns.apply(lambda x: shapiro(x.dropna())[1])


scipy.stats.shapiro: For N > 5000, computed p-value may not be accurate. Current N is 525600.



### Visulizations

#### Quantile-Quantile 

In [64]:
for col in log_returns.columns:
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=np.sort(log_returns[col].dropna()), 
        y=np.sort(norm.rvs(size=len(log_returns[col].dropna()))),
        mode='markers', 
        name='Log Returns'
    ))
    fig.add_trace(go.Scatter(
        x=np.sort(log_returns[col].dropna()), 
        y=np.sort(log_returns[col].dropna()), 
        mode='lines', 
        name='Normal Distribution'
    ))
    fig.update_layout(title=f'Q-Q Plot of {col}', xaxis_title='Theoretical Quantiles', yaxis_title='Sample Quantiles', template='plotly_dark')
    fig.show()

#### EWMA Volatility

In [65]:
for col in log_returns.columns:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=log_returns.index, y=log_returns[col], mode='lines', name='Log Returns'))
    fig.add_trace(go.Scatter(x=ewma_volatility.index, y=ewma_volatility[col], mode='lines', name='EWMA Volatility'))
    fig.update_layout(title=f'Log Returns and EWMA Volatility for {col}', xaxis_title='Time', yaxis_title='Value', template='plotly_dark')
    fig.show()

#### Shapiro-Wilk

In [66]:
shapiro_results

implied_close_price    2.186931e-49
nobitex_close_price    3.191710e-42
tabdeal_close_price    4.570895e-71
wallex_close_price     2.359872e-48
dtype: float64

#### Conclusion
All the above tests suggest that non of the listed close price series are normally distributed.

## Autocorrelation and Stationarity Analysis

In [99]:
nobitex_data = pd.read_csv(RESAMPLED_1D_DATA_DIR + 'Last-Nobitex-USDTTMN.csv')
tabdeal_data = pd.read_csv(RESAMPLED_1D_DATA_DIR + 'Last-Tabdeal-USDTTMN.csv')
wallex_data = pd.read_csv(RESAMPLED_1D_DATA_DIR + 'Last-Wallex-USDTTMN.csv')

In [100]:
def calculate_log_return(df, column_name):
    return np.log(df[column_name]).diff()

nobitex_data['log_return_close'] = calculate_log_return(nobitex_data, 'close_price')
tabdeal_data['log_return_close'] = calculate_log_return(tabdeal_data, 'close_price')
wallex_data['log_return_close'] = calculate_log_return(wallex_data, 'close_price')

In [101]:
def calculate_volatility(df, column_name, window=21):
    return df[column_name].rolling(window=window).std()

nobitex_data['volatility_close'] = calculate_volatility(nobitex_data, 'log_return_close')
tabdeal_data['volatility_close'] = calculate_volatility(tabdeal_data, 'log_return_close')
wallex_data['volatility_close'] = calculate_volatility(wallex_data, 'log_return_close')

In [102]:
def clean_data(df):
    df.dropna(inplace=True)
    return df

nobitex_data = clean_data(nobitex_data)
tabdeal_data = clean_data(tabdeal_data)
wallex_data = clean_data(wallex_data)

### ACF & PACF

In [103]:
def plot_acf_pacf(series, title, lags=30):
    acf_values = acf(series, nlags=lags)
    pacf_values = pacf(series, nlags=lags)

    fig = make_subplots(rows=1, cols=2, subplot_titles=('ACF', 'PACF'))

    # ACF Plot
    fig.add_trace(
        go.Bar(x=list(range(len(acf_values))), y=acf_values, marker_color='dodgerblue', name='ACF'),
        row=1, col=1
    )

    # PACF Plot
    fig.add_trace(
        go.Bar(x=list(range(len(pacf_values))), y=pacf_values, marker_color='orange', name='PACF'),
        row=1, col=2
    )

    # Update layout for dark theme
    fig.update_layout(
        title=f'ACF and PACF of {title}',
        plot_bgcolor='black',
        paper_bgcolor='black',
        font=dict(color='white'),
        xaxis=dict(color='white'),
        yaxis=dict(color='white'),
        xaxis2=dict(color='white'),
        yaxis2=dict(color='white'),
        showlegend=False
    )

    fig.show()

#### One Close Prices

In [104]:
plot_acf_pacf(nobitex_data['close_price'], 'Nobitex Close Price')
plot_acf_pacf(tabdeal_data['close_price'], 'Tabdeal Close Price')
plot_acf_pacf(wallex_data['close_price'], 'Wallex Close Price')

#### On Log Return

In [105]:
plot_acf_pacf(nobitex_data['log_return_close'], 'Log Return Nobitex Close')
plot_acf_pacf(tabdeal_data['log_return_close'], 'Log Return Tabdeal Close')
plot_acf_pacf(wallex_data['log_return_close'], 'Log Return Wallex Close')

#### On Volatility

In [106]:
plot_acf_pacf(nobitex_data['volatility_close'], 'Volatility Nobitex Close')
plot_acf_pacf(tabdeal_data['volatility_close'], 'Volatility Tabdeal Close')
plot_acf_pacf(wallex_data['volatility_close'], 'Volatility Wallex Close')

### ADF Test

In [32]:
from statsmodels.tsa.stattools import adfuller

def adf_test(series, series_name):
    result = adfuller(series)
    print(f'ADF Test for {series_name}')
    print(f'ADF Statistic: {result[0]:.4f}')
    print(f'p-value: {result[1]:.4f}')
    print(f'Critical Values:')
    for key, value in result[4].items():
        print(f'   {key}: {value:.4f}')
    print('Stationary' if result[1] < 0.05 else 'Non-stationary')
    print('\n')

In [61]:
adf_test(nobitex_data['close_price'], 'Nobitex Close Price')
adf_test(tabdeal_data['close_price'], 'Tabdeal Close Price')
adf_test(wallex_data['close_price'], 'Wallex Close Price')

ADF Test for Nobitex Close Price
ADF Statistic: -4.7314
p-value: 0.0001
Critical Values:
   1%: -3.4499
   5%: -2.8702
   10%: -2.5714
Stationary


ADF Test for Tabdeal Close Price
ADF Statistic: -4.3535
p-value: 0.0004
Critical Values:
   1%: -3.4501
   5%: -2.8702
   10%: -2.5714
Stationary


ADF Test for Wallex Close Price
ADF Statistic: -4.0347
p-value: 0.0012
Critical Values:
   1%: -3.4503
   5%: -2.8703
   10%: -2.5715
Stationary




In [62]:
adf_test(nobitex_data['log_return_close'], 'Log Return Nobitex Close')
adf_test(tabdeal_data['log_return_close'], 'Log Return Tabdeal Close')
adf_test(wallex_data['log_return_close'], 'Log Return Wallex Close')

ADF Test for Log Return Nobitex Close
ADF Statistic: -5.9755
p-value: 0.0000
Critical Values:
   1%: -3.4505
   5%: -2.8704
   10%: -2.5715
Stationary


ADF Test for Log Return Tabdeal Close
ADF Statistic: -6.9192
p-value: 0.0000
Critical Values:
   1%: -3.4504
   5%: -2.8704
   10%: -2.5715
Stationary


ADF Test for Log Return Wallex Close
ADF Statistic: -6.8372
p-value: 0.0000
Critical Values:
   1%: -3.4504
   5%: -2.8704
   10%: -2.5715
Stationary




In [63]:
adf_test(nobitex_data['volatility_close'], 'Volatility Nobitex Close')
adf_test(tabdeal_data['volatility_close'], 'Volatility Tabdeal Close')
adf_test(wallex_data['volatility_close'], 'Volatility Wallex Close')

ADF Test for Volatility Nobitex Close
ADF Statistic: -2.7976
p-value: 0.0586
Critical Values:
   1%: -3.4501
   5%: -2.8703
   10%: -2.5714
Non-stationary


ADF Test for Volatility Tabdeal Close
ADF Statistic: -3.0955
p-value: 0.0269
Critical Values:
   1%: -3.4503
   5%: -2.8703
   10%: -2.5714
Stationary


ADF Test for Volatility Wallex Close
ADF Statistic: -2.8027
p-value: 0.0579
Critical Values:
   1%: -3.4500
   5%: -2.8702
   10%: -2.5714
Non-stationary




## Inter-Market Analysis

In [107]:
nobitex_data = pd.read_csv(RESAMPLED_1MONTH_DATA_DIR + 'Last-Nobitex-USDTTMN.csv')
tabdeal_data = pd.read_csv(RESAMPLED_1MONTH_DATA_DIR + 'Last-Tabdeal-USDTTMN.csv')
wallex_data = pd.read_csv(RESAMPLED_1MONTH_DATA_DIR + 'Last-Wallex-USDTTMN.csv')

In [108]:
plot_acf_pacf(nobitex_data['close_price'], 'Nobitex Close Price', lags=6)
plot_acf_pacf(tabdeal_data['close_price'], 'Tabdeal Close Price', lags=6)
plot_acf_pacf(wallex_data['close_price'], 'Wallex Close Price', lags=6)