**Created by Sanskar Hasija**

**📊 G-Research Plots + EDA 📊**

**3 NOVEMBER 2021**


# <center>📊G-RESEARCH PLOTS + EDA 📊 </center>

# <center>IMPORTS</center> 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [None]:
data = pd.read_csv('../input/g-research-crypto-forecasting/train.csv') 
asset_details = pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv') 

# <center>EDA</center> 

## Column Description
*   **timestamp**: All timestamps are returned as second Unix timestamps (the number of seconds elapsed since 1970-01-01 00:00:00.000 UTC). Timestamps in this dataset are multiple of 60, indicating minute-by-minute data.
*   **Asset_ID**: The asset ID corresponding to one of the crytocurrencies (e.g. `Asset_ID = 1` for Bitcoin). The mapping from `Asset_ID` to crypto asset is contained in `asset_details.csv`.
*   **Count**: Total number of trades in the time interval (last minute).
*   **Open**:	Opening price of the time interval (in USD).
*   **High**:	Highest price reached during time interval (in USD).
*   **Low**: Lowest price reached during time interval (in USD).
*   **Close**:	Closing price of the time interval (in USD).
*   **Volume**:	The number of cryptoasset units traded during the minute.
*   **VWAP**: The average price of the asset over the time interval, weighted by volume. VWAP is an aggregated form of trade data.
*   **Target**: Residual log-returns for the asset over a 15 minute horizon. 


In [None]:
data.head() 

### Checking Null Rows

In [None]:
data.isnull().sum()

In [None]:
print("Total Null Target Rows = " ,data["Target"].isnull().sum())
print("Percentage of NUll rows in Training Data = {:.2f}%".format(data["Target"].isnull().sum()*100 / data.shape[0] ))

# <center>DATA DISTRIBUTION</center> 

### Training Data Distribution among differnet Assets (Crypto Currencies)

In [None]:
asset_count= []
for i in range(14):
    count = (data["Asset_ID"]==i).sum()
    asset_count.append(count)
fig = px.bar(x = asset_details.sort_values("Asset_ID")["Asset_Name"],
             y = asset_count , 
             color = asset_count ,
             color_continuous_scale="Emrld") 
fig.update_xaxes(title="Assets")
fig.update_yaxes(title = "Number of Rows")
fig.update_layout(showlegend = True,
    title = {
        'text': 'Data Distribution ',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'} ,
        template="plotly_white")
fig.show()

# <center>CANDELSTICK CHARTS</center> 

In [None]:
def crypto_df(asset_id ,data= data ):
    df = data[data["Asset_ID"]==asset_id].reset_index(drop = True)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    df = df.set_index('timestamp')
    return df
btc = crypto_df(asset_id = 1)
eth = crypto_df (asset_id = 6 )
ltc = crypto_df(asset_id = 9 )

In [None]:
def candelstick_chart(data,title):
    candlestick = go.Figure(data = [go.Candlestick(x =data.index, 
                                               open = data[('Open')], 
                                               high = data[('High')], 
                                               low = data[('Low')], 
                                               close = data[('Close')])])
    candlestick.update_xaxes(title_text = 'Time',
                             rangeslider_visible = True)

    candlestick.update_layout(
    title = {
        'text': '{:} Candelstick Chart'.format(title),
        'y':0.90,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'} , 
    template="plotly_white")

    candlestick.update_yaxes(title_text = 'Price in USD', ticksuffix = '$')
    return candlestick

### Bitcoin(BTC) Candelstick Chart for last 100 rows

In [None]:
btc_plot = candelstick_chart(btc[-100:],title = "Bitcoin(BTC)")
btc_plot.show()

### Litecoin(LTC) Candelstick Chart for last 2500 rows

In [None]:
ltc_plot = candelstick_chart(ltc[-2500:],title = "Litecoin(LTC)")
ltc_plot.show()

# <center>OHLC CHARTS</center> 

In [None]:
def ohlc_chart(data,title):
    ohlc = go.Figure(data = [go.Ohlc(x =data.index, 
                                               open = data[('Open')], 
                                               high = data[('High')], 
                                               low = data[('Low')], 
                                               close = data[('Close')])])
    ohlc.update_xaxes(title_text = 'Time',
                             rangeslider_visible = True)

    ohlc.update_layout(
    title = {
        'text': '{:} OHLC Chart'.format(title),
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        template="plotly_white")

    ohlc.update_yaxes(title_text = 'Price in USD', ticksuffix = '$')
    return ohlc

### Bitcoin(BTC) OHLC Chart for first 200 rows

In [None]:
ohlc_chart(btc[:200], title = "Bitcoin(BTC)")

### Ethereum(ETH) OHLC Chart for last 100 rows

In [None]:
ohlc_chart(eth[-100:], title = "Ethereum(ETH)")

# <center>AREA PLOTS</center> 

In [None]:
def vol_traded(data ,title,color):
    area = px.area(data_frame=data,
               x = data.index ,
               y = "Volume",
               markers = True)
    area.update_traces(line_color=color)
    area.update_xaxes(
        title_text = 'Time',
        rangeslider_visible = True)
    area.update_yaxes(title_text = 'Number of trades every minute')
    area.update_layout(showlegend = True,
        title = {
            'text': '{:} Volume Traded'.format(title),
            'y':0.94,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        template="plotly_white")
    return area

### Ethereum(ETH) Volume Traded for last 50 rows

In [None]:
vol_traded(eth[-50:], "Ethereum (ETH)",color = "Red")

### Litecoin(LTC) Volume Traded for last 300 rows

In [None]:
vol_traded(ltc[-300:], "Litecoin(LTC)",color = "Blue")

# <center>If you find this notebook useful, support with an upvote👍</center>