In [None]:
# 📈 library

In [None]:
#library
import os
from datetime import datetime
import time
from sklearn.preprocessing import StandardScaler
import plotly.express as px
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import seaborn as sns
import math

import statsmodels.api as sm
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy import stats
from itertools import product

import warnings
warnings.filterwarnings("ignore")
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

info = pd.read_csv("/kaggle/input/g-research-crypto-forecasting/asset_details.csv")
ctrain = pd.read_csv("/kaggle/input/g-research-crypto-forecasting/train.csv")

**Color Scheme**

In [None]:
sns.color_palette("YlOrBr", 10)

<h1 id="h1">
        <div style="color:black;
           display:fill;
           border-radius:5px;
           background-color:#ffeea9;
           font-size:100%;
           font-family:Verdana;
           letter-spacing:0.5px">
1. 📝 Introduction
<a class="anchor-link" 
 href="https://www.kaggle.com/code/fangya/cryptocurrency-data-visualization-arima">¶</a>
</h1>


Crypto, a form of digital token or “coins” that exist on a distributed and decentralized ledger called a blockchain, is a popular investment nowadays.

A crypto is a collection of binary data which is designed to work as a medium of exchange. Individual coin ownership records are stored in a ledger, which is a computerized database using strong cryptography to secure transaction records, to control the creation of additional coins, and to verify the transfer of coin ownership.

*KEY idea: Decentralization, Blockchain

This project consisite of three parts: 
1. Data visualtion for 14 popular cryptocurrency
2. Price History for selected individual cryptocurrency
3. Basic Arima Model for price prediction

In [None]:
ctrain.head()

In [None]:
# Impute Missing Time Value
def c_time_sub(asset_id, data=ctrain):
    df=data[ctrain["Asset_ID"]==asset_id].set_index("timestamp")
    df=df.reindex(range(df.index[0],df.index[-1]+60,60), method="pad")
    return df

# Subgroup Bitcoin
btc=c_time_sub(asset_id=1)
(btc.index[1:] -btc.index[:-1]).value_counts().head()

# Subgroup Ethereum
eth=c_time_sub(asset_id=6)
(btc.index[1:] -btc.index[:-1]).value_counts().head()

# Subgroup Cardano
ada=c_time_sub(asset_id=3)
(ada.index[1:] -ada.index[:-1]).value_counts().head()

In [None]:
# time frame selection: from datetime to timestamp
totimestamp= lambda s: np.int32(time.mktime(datetime.strptime(s,"%d/%m/%Y").timetuple()))

# Log Return 
def log_return(series, periods=1):
    return np.log(series).diff(periods=periods)

In [None]:
# Data Selection
def crypto_sub(asset_id ,data= ctrain ):
    df = data[data["Asset_ID"]==asset_id].reset_index(drop = True)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    df = df.set_index('timestamp')
    return df

#Subgroup Bitcoin
btc1=crypto_sub(asset_id=1)

#Subgroup Eth
eth1=crypto_sub(asset_id=6)

#Subgroup Ada
ada1=crypto_sub(asset_id=3)


<h1 id="h2">
<div style="color:black;
           display:fill;
           border-radius:5px;
           background-color:#ffeea9;
           font-size:100%;
           font-family:Verdana;
           letter-spacing:0.5px">
2. 🎨 Data Visualization
<a class="anchor-link" href="https://www.kaggle.com/code/fangya/cryptocurrency-data-visualization-arima/h2">¶</a>
</h1>
   
we will present the weight among the 14 different crypto currency and investigate three crypto currency in depth.



 <h2 style="color:#f88a21;"> Cryptocurrency summary </h2>

0. Bitcoin (BTC): launched in January 2009. It introduced a novel idea set out in a white paper by the mysterious Satoshi Nakamoto—bitcoin offers the promise of an online currency that is secured without any central authority, unlike government-issued currencies.


1. Ethereum (ETH): The goal behind Ethereum is to create a decentralized suite of financial products that anyone in the world can freely access, regardless of nationality, ethnicity, or fait.


3. Litecoin(LTC): known as “silver to Bitcoin’s gold.” It is Based on an open-source global payment network that is not controlled by any central authority and uses “scrypt” as a proof of work, which can be decoded with the help of consumer-grade CPUs.


4. Cardano (ADA)：an “Ouroboros proof-of-stake” cryptocurrency that was created with a research-based approach by engineers, mathematicians, and cryptography experts.


5. Bitcoin Cash (BCH): an important place in the history of altcoins because it is one of the earliest and most successful hard forks of the original Bitcoin


6. Stella(XLM): an open blockchain network designed to provide enterprise solutions by connecting financial institutions for the purpose of large transactions.(Transcations between banks)


7. Dogecoin(DOGE): as the original "memecoin" caused a stir in 2021 as the price of the coin skyrocketed. (Popular because of Elon Musk)


8. Binance Coin(BNB): a utility cryptocurrency that operates as a payment method for the fees associated with trading on the Binance Exchange. It is the third-largest cryptocurrency by market capitalization. (initiated from ETH)


9. Monero(XMR): a secure, private, and untraceable currency.The development of this cryptocurrency is completely donation-based and community-driven.


10. TRON: Founded in 2017 by a Singapore non-profit organization, aims to host a global entertainment system for the cost-effective sharing of digital content.


11. IOTA: a distributed ledger designed to record and execute transactions between machines and devices in the Internet of Things (IoT) ecosystem.


12. EOS.IO: a blockchain-based, decentralized platform used to develop, host, and run business applications, or dApps.


13. Ethererum Classic: an open-source, decentralized, blockchain-based distributed cryptocurrency platform that runs smart contracts. It is as a split version of ETH.



 <h2 style="color:#f88a21;"> Weight Distribution Plot: </h2>
The Bar plot shows Bitcoin, Ethereum, and Cardano are the three most popular crypto currency for trading or investment

In [None]:
%matplotlib inline
info_s=info.sort_values("Weight")
fig_index=px.bar(info_s,x="Asset_Name" , y="Weight", color="Weight", title="Popular Cryptocurrency Weight Distribution",
                color_continuous_scale=px.colors.sequential.YlOrBr)
fig_index.show()

 <h2 style="color:#f88a21;"> 2.2 Cryptocurrency Log Return Correlation Plot for 2021  </h2>

From the Correlation plot we can see Bitcoin Cash is highly correlated with EOS.IO.

Binance Coin is correlated with many crypto currencies, such as , Bitcoin, Bitcoin Cash, Cardano, Ethereum. 

We can have a roughly conclude that Binance Coin, Bitcoin Cash prices have a strong relation with other cryptocurrencies. Whereas Dogecoin and Monero prices are quite independent. 
Ealier we mentioned Monero is based a donation, which makes sense it is not correlated so much with other coins.


In [None]:
# time frame selection: from datetime to timestamp
totimestamp= lambda s: np.int32(time.mktime(datetime.strptime(s,"%d/%m/%Y").timetuple()))

# Log Return 
def log_return(series, periods=1):
    return np.log(series).diff(periods=periods)

all2021=pd.DataFrame([])
for asset_id, asset_name in zip(info.Asset_ID, info.Asset_Name):
    asset=ctrain[ctrain["Asset_ID"]==asset_id].set_index("timestamp")
    asset=asset.loc[totimestamp("01/01/2021"):totimestamp("21/09/2021")]
    asset=asset.reindex(range(asset.index[0],asset.index[-1]+60,60), method="pad")
    lret=log_return(asset.Close.fillna(0))[1:]
    all2021=all2021.join(lret,rsuffix=asset_name,how="outer")
    
plt.imshow(all2021.corr());
plt.yticks(info.Asset_ID, info.Asset_Name.values)
plt.xticks(info.Asset_ID, info.Asset_Name.values,rotation="vertical");
plt.colorbar(cmap="coolwarm")

<h1 id="h3">
    <div style="color:black;
           display:fill;
           border-radius:5px;
           background-color:#ffeea9;
           font-size:100%;
           font-family:Verdana;
           letter-spacing:0.5px"> 3.✨  Price History      
<a class="anchor-link" href="https://www.kaggle.com/code/fangya/cryptocurrency-data-visualization-arima">¶</a>
</h1>

We will use the Closing Price and Candlestick graph to capture the trends of the crypto currency.

 <h2 style="color:#e96d13;"> 3.1 Closing Price Trend in 2021 </h2>

we will impute the missing time value using the "pad" method and present the overall closing price for Bitcoin, Etherum, Carnado 
 

In [None]:
# Impute Missing Time Value
def c_time_sub(asset_id, data=ctrain):
    df=data[ctrain["Asset_ID"]==asset_id].set_index("timestamp")
    df=df.reindex(range(df.index[0],df.index[-1]+60,60), method="pad")
    return df

In [None]:
#1. Subgroup Bitcoin
btc=c_time_sub(asset_id=1)
(btc.index[1:] -btc.index[:-1]).value_counts().head()
btc.head()

In [None]:
#2. Subgroup Ethereum
eth=c_time_sub(asset_id=6)
(btc.index[1:] -btc.index[:-1]).value_counts().head()
eth.head()

In [None]:
#3. Subgroup Cardano
ada=c_time_sub(asset_id=3)
(ada.index[1:] -ada.index[:-1]).value_counts().head()
ada.head()

In [None]:
# Create time interval for 2021
def dur(start,end,data):
    df=data.loc[totimestamp(start): totimestamp(end)]
    return df

btc2021= dur(start="01/01/2021", end="21/09/2021", data=btc)
eth2021= dur(start="01/01/2021", end="21/09/2021", data=eth)
ada2021= dur(start="01/01/2021", end="21/09/2021", data=ada)

 <h3 style="color:#e96d13;"> Investment Tips 1 </h3>
We can see there is tremedeous price increase for BTC, ETH, and ADA in 2021, which implies they are all good investment

In [None]:
# Plot the Closing Price for BTC, ETH, ADA
f= plt.figure(figsize=(10,12))  

def gplot(no , data, price, label, ylabel, color):
    ax=f.add_subplot(no)
    plt.plot(data[price], label=label, color=color)
    plt.legend()
    plt.xlabel("Time")
    plt.ylabel(ylabel)
    return plt

gplot(no=311, data=btc, price="Close" , label="BTC 2021 Overall Performance", ylabel="BTC Closing Price", color="#fede86")
gplot(no=312, data=eth, price="Close" ,label="ETH 2021 Overall Performance", ylabel="ETH Closing Price", color="#e96d13")
gplot(no=313, data=ada, price="Close" ,label="Cardano 2021 Overall Performance", ylabel="ADA Closing Price", color="#b03f03")

plt.tight_layout()
plt.show()

 <h2 style="color:#f88a21;"> 3.2 Return Rate for BTC, ETH, and ADA </h2>
we see the crypto currency price went up dramatically regardless of the type, we would like to investigate one step further for the Target variable.
*Target : 15 minute resudualized returns.

*Residual Return: An asset's residual return equals its excess return minus beta times the benchmark excess return.

As we can see ETH, and BTC residual return are relatively stable compared to ADA.
This might be a good implication that if the investor would take short time trading opportunities, ADA is a better choice. 

If the investor is risk averse, BTC or ETH will be a better fit

In [None]:
f= plt.figure(figsize=(10,12))  
gplot(no=311, data=btc, price="Target" , label="BTC 2021 15min Return Residue", ylabel="BTC residual return", color="#fede86")
gplot(no=312, data=eth, price="Target" ,label="ETH 2021 15min Return Residue", ylabel="ETH residual return", color="#e96d13")
gplot(no=313, data=ada, price="Target" ,label="ADA 2021 15min Return Residue", ylabel="ADA residual return", color="#b03f03")


plt.tight_layout()
plt.show()

 <h2 style="color:#f88a21;"> 3.3 Candlestick Chart </h2>
Candlestick charts are used by traders to determine possible price movement based on past patterns.

In [None]:
def c_chart(data,label):
    candlestick = go.Figure(data = [go.Candlestick(x =data.index, 
                                               open = data[('Open')], 
                                               high = data[('High')], 
                                               low = data[('Low')], 
                                               close = data[('Close')])])
    candlestick.update_xaxes(title_text = 'Time',
                             rangeslider_visible = True)

    candlestick.update_layout(
    title = {
        'text': '{:} Candelstick Chart'.format(label),
        "y":0.8,
        "x":0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

    candlestick.update_yaxes(title_text = 'Price in USD', ticksuffix = '$')
    return candlestick

btc1.head()

 <h3 style="color:#b03f03;"> Investment Tips 2 </h3>
During the time interval 22:40 to 22:50, there is a series of downward red candle, which represent **Oversold.** Theoratically, the price would go up followed by a Oversold interval

**Oversold** means it's trading below what would be a typical or average trading range(sometimes the investors also call it as true value). Normally, oversold draw buyers in the market.


In [None]:
%matplotlib inline
btc_candle=c_chart(btc1[-90:], label="BTC Price")
btc_candle.show()

<h3 style="color:#e96d13;"> ETH Candlestick </h3>

We randomly pick a time interval for the ETh price, we see an over sold around 1:50am, and an overbuy around 02:05 am.

In [None]:
eth_candle=c_chart(eth1[100888:100988], label="ETH Price ")
eth_candle.show()

<h3 style="color:#e96d13;"> ADA Candle Stick </h3>

Overbuying around 16:20

In [None]:
%matplotlib inline
ada_candle=c_chart(eth1[-500:-400], label="ETH Price ")
ada_candle.show()


<h1 id="h4">
    <div style="color:black;
           display:fill;
           border-radius:5px;
           background-color:#ffeea9;
           font-size:100%;
           font-family:Verdana;
           letter-spacing:0.5px"> 4.🌅  ARIMA Model   
<a class="anchor-link" href="https://www.kaggle.com/code/fangya/cryptocurrency-data-visualization-arima">¶</a>
</h1>

In [None]:
# Function to Subset the Price variables
def mini_data(data):
    df=data[["Close","Low","High","Open"]]
    return df

btc1_mini=mini_data(btc1)
eth1_mini=mini_data(eth1)
ada1_mini=mini_data(ada1)
btc1.head()

 <h2 style="color:#e96d13;"> 4.1 Seasonal Decomposition</h2> 

In [None]:
# Function to Plot
plt.rcParams["figure.figsize"]=(15,7)

def season_df(data, label):
    df=data.resample("M").mean()
    seasonal_decompose(df.Close).plot()
    print(label)
    return plt.show()

season_df(data=btc1_mini, label="BTC Seasonal Decomposition")

In [None]:
#ETH Seasonal Plot
season_df(data=eth1_mini, label="ETH Seasonal Decomposition")

In [None]:
#ADA seasonal decompostion
season_df(data=ada1_mini, label="ADA Seasonal Decomposition")

 <h2 style="color:#e96d13;"> 4.2 Box - Cox Transformation </h2>

In [None]:
#Box-Cox Transformation
btc_month=btc1_mini.resample("M").mean()
btc_month["close_box"], lmbda=stats.boxcox(btc_month.Close)

#btc_month['close_box'], lmbda = stats.boxcox(btc_month.Close)

In [None]:
qs = range(0, 3)
ps = range(0, 3)
d=1
parameters = product(ps, qs)
parameters_list = list(parameters)
len(parameters_list)

# Model Selection
results = []
best_aic = float("inf")
warnings.filterwarnings('ignore')
for param in parameters_list:
    try:
        model = SARIMAX(btc_month.close_box, order=(param[0], d, param[1])).fit(disp=-1)
    except ValueError:
        print('bad parameter combination:', param)
        continue
    aic = model.aic
    if aic < best_aic:
        best_model = model
        best_aic = aic
        best_param = param
    results.append([param, model.aic])

In [None]:
result_table = pd.DataFrame(results)
result_table.columns = ['parameters', 'aic']
print(result_table.sort_values(by = 'aic', ascending=True).head())
print(best_model.summary())

In [None]:
best_model.plot_diagnostics(figsize=(15, 12))
plt.show()

In [None]:
def invboxcox(y,lmbda):
    if lmbda == 0:
        return(np.exp(y))
    else:
        return(np.exp(np.log(lmbda*y+1)/lmbda))

 <h2 style="color:#e96d13;"> 4.2 ARIMA for Bitcoin </h2>

In [None]:
# Prediction
btc_month_pred = btc_month[['Close']]
date_list = [datetime(2021, 6, 20), datetime(2021, 7, 20), datetime(2021, 8, 21), datetime(2021, 9, 21)]
future = pd.DataFrame(index=date_list, columns= btc_month.columns)
btc_month_pred = pd.concat([btc_month_pred, future])
btc_month_pred['forecast'] = invboxcox(best_model.predict(start=datetime(2018, 1, 31), end=datetime(2021, 5, 31)), lmbda)
plt.figure(figsize=(18,10))
btc_month_pred.Close.plot(color='#fede86')
btc_month_pred.forecast.plot(color='#b03f03', ls='--', label='Predicted Close')
plt.legend()
plt.title('Bitcoin monthly forecast')
plt.ylabel('USD')
plt.show()


<h1 id="h5">
    <div style="color:black;
           display:fill;
           border-radius:5px;
           background-color:#ffeea9;
           font-size:100%;
           font-family:Verdana;
           letter-spacing:0.5px"> 💌 Reference:
<a class="anchor-link" href="https://www.kaggle.com/code/fangya/cryptocurrency-data-visualization-arima/h5">¶</a>
</h1>
    
https://en.wikipedia.org/wiki/Cryptocurrency

https://www.investopedia.com/tech/most-important-cryptocurrencies-other-than-bitcoin/

https://www.kaggle.com/cstein06/tutorial-to-the-g-research-crypto-competition

https://www.kaggle.com/odins0n/g-research-plots-eda

https://www.kaggle.com/taniaj/cryptocurrency-predictions-with-arima

**Special Thanks to Candice** ❤️ !

### Thank you! 谢谢 🙏