In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm
from statsmodels.tsa.stattools import grangercausalitytests

In [21]:
ls

'AVAX_Price&Amount_GC.ipynb'        'PLA_Price&Count_relation.ipynb'
'AVAX_Price&Amount_relation.ipynb'   [0m[01;34mprice[0m/
'AVAX_Price&Count_GC.ipynb'          [01;34mtransaction_amount[0m/
'AVAX_Price&Count_relation.ipynb'    [01;34mtransaction_count[0m/
'PLA_Price&Amount_relation.ipynb'    [01;34mtransfer_amount[0m/
'PLA_Price&Count_GC.ipynb'           [01;34mtransfer_count[0m/


## 이 파일의 목적
AVAX의 Granger causality를 계산하기 위해서 진행

In [22]:
avax_price = pd.read_excel('price/BINANCE_AVAXUSDT_D.xlsx')
avax_transaction = pd.read_csv('transaction_amount/AVAX_transaction_Amount.csv')

In [23]:
# 정상성을 만족하지 않으므로 1차 차분 진행
avax_price['Close_diff'] = avax_price['Close'].diff()

In [24]:
avax_price.rename(columns = {"Open_time" : "Date"}, inplace = True)

In [25]:
avax_price

Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Volume,HV,전일종가 대비 변동 폭,기준 13%,기준 20%,Close_diff
0,0,2020-09-22,0.8500,7.0000,0.8500,5.3193,31441883.16,,,NAN,NAN,
1,1,2020-09-23,5.3279,5.3600,3.4000,3.5350,12620608.01,,0.335439,0,1,-1.7843
2,2,2020-09-24,3.5305,4.8873,3.4033,4.6411,7107843.86,,0.312900,0,1,1.1061
3,3,2020-09-25,4.6366,5.0234,4.2001,4.7134,3886096.71,,0.015578,0,0,0.0723
4,4,2020-09-26,4.7164,4.8200,4.3294,4.5200,1814932.97,,0.041032,0,0,-0.1934
...,...,...,...,...,...,...,...,...,...,...,...,...
1367,1367,2024-06-20,26.9500,28.8900,26.6700,27.5800,1873349.75,73.678687,0.023756,0,0,0.6400
1368,1368,2024-06-21,27.5800,28.0300,26.9300,27.4800,1411878.18,73.823462,0.003626,0,0,-0.1000
1369,1369,2024-06-22,27.4800,27.5000,24.5200,25.6100,3754180.61,66.568123,0.068049,0,0,-1.8700
1370,1370,2024-06-23,25.6200,26.1500,24.1200,25.0900,2992605.46,60.138325,0.020305,0,0,-0.5200


In [26]:
avax_transaction

Unnamed: 0,Date,Transaction Amount
0,2020.9.23,2.343750e-01
1,2020.9.24,1.000000e-01
2,2020.9.25,1.200000e-02
3,2020.9.27,0.000000e+00
4,2020.9.28,0.000000e+00
...,...,...
1370,2024.7.18,2.416854e+06
1371,2024.7.19,2.916211e+06
1372,2024.7.20,2.299388e+06
1373,2024.7.21,3.898406e+06


In [27]:
avax_transaction['Date'] = avax_transaction['Date'].astype('datetime64[ns]')

In [28]:
avax_price.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1372 entries, 0 to 1371
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Unnamed: 0    1372 non-null   int64         
 1   Date          1372 non-null   datetime64[ns]
 2   Open          1372 non-null   float64       
 3   High          1372 non-null   float64       
 4   Low           1372 non-null   float64       
 5   Close         1372 non-null   float64       
 6   Volume        1372 non-null   float64       
 7   HV            1362 non-null   float64       
 8   전일종가 대비 변동 폭  1371 non-null   float64       
 9   기준 13%        1372 non-null   object        
 10  기준 20%        1372 non-null   object        
 11  Close_diff    1371 non-null   float64       
dtypes: datetime64[ns](1), float64(8), int64(1), object(2)
memory usage: 128.8+ KB


In [29]:
avax_transaction.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1375 entries, 0 to 1374
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                1375 non-null   datetime64[ns]
 1   Transaction Amount  1375 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 21.6 KB


In [30]:
df = pd.merge(avax_price[['Date', 'Close_diff']],avax_transaction[:-21], how = 'outer', on = 'Date')
df

Unnamed: 0,Date,Close_diff,Transaction Amount
0,2020-09-22,,
1,2020-09-23,-1.7843,2.343750e-01
2,2020-09-24,1.1061,1.000000e-01
3,2020-09-25,0.0723,1.200000e-02
4,2020-09-26,-0.1934,
...,...,...,...
1374,2024-06-27,,3.665443e+06
1375,2024-06-28,,3.130146e+06
1376,2024-06-29,,1.391805e+06
1377,2024-06-30,,1.643288e+06


In [31]:
# 한 값이라도 NaN으로 되어 있으면 drop을 진행
df = df.dropna()

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1347 entries, 1 to 1371
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Date                1347 non-null   datetime64[ns]
 1   Close_diff          1347 non-null   float64       
 2   Transaction Amount  1347 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 42.1 KB


In [33]:
# Set the Date column as the index and ensure it is a datetime type
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Date'] = pd.to_datetime(df['Date'])


In [35]:
# Perform Granger causality test
max_lag = 100  # Maximum lag to test
test_result = grangercausalitytests(df[['Transaction Amount', 'Close_diff']], max_lag, verbose=True)

# Output the result of the Granger causality test
print(test_result)




Granger Causality
number of lags (no zero) 1
ssr based F test:         F=5.4552  , p=0.0197  , df_denom=1343, df_num=1
ssr based chi2 test:   chi2=5.4673  , p=0.0194  , df=1
likelihood ratio test: chi2=5.4563  , p=0.0195  , df=1
parameter F test:         F=5.4552  , p=0.0197  , df_denom=1343, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=2.2020  , p=0.1110  , df_denom=1340, df_num=2
ssr based chi2 test:   chi2=4.4204  , p=0.1097  , df=2
likelihood ratio test: chi2=4.4132  , p=0.1101  , df=2
parameter F test:         F=2.2020  , p=0.1110  , df_denom=1340, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=1.7860  , p=0.1479  , df_denom=1337, df_num=3
ssr based chi2 test:   chi2=5.3861  , p=0.1456  , df=3
likelihood ratio test: chi2=5.3754  , p=0.1463  , df=3
parameter F test:         F=1.7860  , p=0.1479  , df_denom=1337, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.2698  , p=0.

Transaction count와 종가 사이에 Granger causality는 이틀까지 있다고 보여진다. 즉 Count가 종가를 예측하는데 도움이 된다는 의미이다.

In [None]:
# Perform Granger causality test
max_lag = 10  # Maximum lag to test
test_result = grangercausalitytests(df[['Close', 'Transaction Count']], max_lag, verbose=True)

# Output the result of the Granger causality test
print(test_result)