## Import Libraries

In [1]:
import os
import pathlib
import json
import sqlalchemy as db

from datetime import datetime, date

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import mplfinance as mpf

import plotly.offline as pyo
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

pyo.init_notebook_mode(connected=True)


## Configurations

In [2]:
import quandl
import pandas_datareader as pdr
from pycoingecko import CoinGeckoAPI

quandl.ApiConfig.api_key = "f6Y8avFQZwXp37ftC1_6"
cg = CoinGeckoAPI()


project_dir = pathlib.Path().absolute()
workdir = project_dir.parent
# print(f"{project_dir}\n{workdir}")

## Build functions

In [3]:

def jprint(obj):
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)


def get_json_data(data_id, json_path, provider):
    """Download and cache JSON data, return as a dataframe."""
    
    cache_path = f"{workdir}\\02_Resources\{data_id}.json"
    try:        
        f = open(cache_path, 'rb')
        df = pd.read_json(f)   
        print(f"Loaded {cache_path} from cache")
    except (OSError, IOError) as e:
        print(f"Downloading {data_id} from {provider}")
        df = pd.read_json(json_path)
        df.to_json(cache_path)
        print(f"Cached {data_id} at {cache_path}")
    return df



## Quandl Data import

def get_quandl_data(quandl_id):
    """" Download quandl data and load to json """
    
    data_id = quandl_id.replace('/', '-')
    provider = "Quandl"
    df = quandl.get(quandl_id, returns="pandas")
    json_path = df.to_json()
    return get_json_data(data_id, json_path, provider)


    
## Coingecko Data import



## Data Reader Data import

def get_datareader_data(datareader_id, provider):
    """" Download datareader data and load to json """
    data_id = datareader_id
    df = pdr.DataReader(datareader_id, provider)
    if df.shape[0]!=df.index.nunique():
        idx = np.unique(df.index.values, return_index=True)[1]
        df = df.iloc[idx]    
            
    json_path = df.to_json()
    return get_json_data(data_id, json_path, provider)


## Poloniex Data import

base_polo_url = 'https://poloniex.com/public?command=returnChartData&currencyPair={}&start={}&end={}&period={}'
start_date = datetime.strptime('2014-01-01', '%Y-%m-%d') # get data from the start of 2014
end_date = datetime.now() # up until today
period = 86400 # pull daily data (86,400 seconds per day)

def get_poloniex_data(poloniex_pair):
    """Retrieve cryptocurrency data from poloniex"""
    
    data_id = poloniex_pair
    provider="Poloniex"
    json_url = base_polo_url.format(poloniex_pair, start_date.timestamp(), end_date.timestamp(), period)
    df = get_json_data(data_id, json_url, poloniex_pair)
    df = df.set_index('date')
    return df

## Initial Data Exploration

### Look at Quandl Data

In [4]:
df_sample_quandl_kraken = get_quandl_data('BCHARTS/KRAKENUSD')

Loaded C:\Users\akosr\CAS_DAENG\crypto_project\02_Resources\BCHARTS-KRAKENUSD.json from cache


In [5]:
df_sample_quandl_kraken.head()

Unnamed: 0,Open,High,Low,Close,Volume (BTC),Volume (Currency),Weighted Price
2014-01-07,874.6704,892.06753,810.0,810.0,15.622378,13151.472844,841.835522
2014-01-08,810.0,899.84281,788.0,824.98287,19.182756,16097.329584,839.156269
2014-01-09,825.56345,870.0,807.42084,841.86934,8.158335,6784.249982,831.572913
2014-01-10,839.99,857.34056,817.0,857.33056,8.02451,6780.220188,844.938794
2014-01-11,858.2,918.05471,857.16554,899.84105,18.748285,16698.566929,890.671709


In [6]:
df_sample_quandl_kraken.index[0]

Timestamp('2014-01-07 00:00:00')

In [7]:
df_sample_quandl_kraken.index[-1]

Timestamp('2021-04-06 00:00:00')

In [8]:
fig = go.Figure(data=[go.Candlestick(x=df_sample_quandl_kraken.index,
                open=df_sample_quandl_kraken['Open'],
                high=df_sample_quandl_kraken['High'],
                low=df_sample_quandl_kraken['Low'],
                close=df_sample_quandl_kraken['Close'])])
fig.show()

### Look at Coingecko market & exchange data

In [9]:
coins_list = cg.get_coins_list()
coin_names = []
for item in coins_list:
    coin_names.append(item['name'])

In [10]:
# jprint(coins_list)

In [11]:
markets_list = cg.get_coins_markets(vs_currency='usd')
market_names = []
for item in markets_list:
    market_names.append(item['name'])
len(market_names)

100

In [12]:
# jprint(markets_list)

In [13]:
df_cg_markets = pd.DataFrame.from_dict(markets_list)
df_cg_markets.head()

Unnamed: 0,id,symbol,name,image,current_price,market_cap,market_cap_rank,fully_diluted_valuation,total_volume,high_24h,...,total_supply,max_supply,ath,ath_change_percentage,ath_date,atl,atl_change_percentage,atl_date,roi,last_updated
0,bitcoin,btc,Bitcoin,https://assets.coingecko.com/coins/images/1/la...,55906.0,1046347391357,1,1176535000000.0,69065510817,58662.0,...,21000000.0,21000000.0,61712.0,-9.13927,2021-03-13T20:49:26.606Z,67.81,82590.8793,2013-07-06T00:00:00.000Z,,2021-04-07T19:06:54.102Z
1,ethereum,eth,Ethereum,https://assets.coingecko.com/coins/images/279/...,1960.27,228312309468,2,,38015078015,2128.03,...,,,2153.08,-8.09089,2021-04-06T01:14:31.985Z,0.432979,456938.95068,2015-10-20T00:00:00.000Z,"{'times': 45.9727573797151, 'currency': 'btc',...",2021-04-07T19:07:25.299Z
2,binancecoin,bnb,Binance Coin,https://assets.coingecko.com/coins/images/825/...,368.78,57670787478,3,63641870000.0,7982674849,412.45,...,170533700.0,170533651.9,412.45,-9.39138,2021-04-07T06:53:56.811Z,0.039818,938473.10423,2017-10-19T00:00:00.000Z,,2021-04-07T19:07:55.269Z
3,tether,usdt,Tether,https://assets.coingecko.com/coins/images/325/...,0.99353,43252501976,4,,154242894003,1.0,...,43168100000.0,,1.32,-24.27188,2018-07-24T00:00:00.000Z,0.572521,75.00759,2015-03-02T00:00:00.000Z,,2021-04-07T19:06:34.171Z
4,ripple,xrp,XRP,https://assets.coingecko.com/coins/images/44/l...,0.907172,42409743442,5,,20423909598,1.1,...,100000000000.0,,3.4,-72.81697,2018-01-07T00:00:00.000Z,0.002686,34290.57178,2014-05-22T00:00:00.000Z,,2021-04-07T19:08:00.452Z


In [14]:
exchanges_list = cg.get_exchanges_list()
exchange_names = []
for item in exchanges_list:
    exchange_names.append(item['name'])
len(exchange_names)

100

In [15]:
# jprint(exchanges_list)

In [16]:
df_cg_exchanges = pd.DataFrame.from_dict(exchanges_list)
df_cg_exchanges.head()

Unnamed: 0,id,name,year_established,country,description,url,image,has_trading_incentive,trust_score,trust_score_rank,trade_volume_24h_btc,trade_volume_24h_btc_normalized
0,binance,Binance,2017.0,Cayman Islands,,https://www.binance.com/,https://assets.coingecko.com/markets/images/52...,False,10,1,813727.951758,813727.951758
1,gdax,Coinbase Pro,2012.0,United States,,https://www.coinbase.com,https://assets.coingecko.com/markets/images/23...,False,10,2,64487.159778,64487.159778
2,kraken,Kraken,2011.0,United States,,https://r.kraken.com/Q1m9x,https://assets.coingecko.com/markets/images/29...,False,10,3,35036.604107,35036.604107
3,bitfinex,Bitfinex,2014.0,British Virgin Islands,,https://www.bitfinex.com,https://assets.coingecko.com/markets/images/4/...,False,10,4,28366.704117,28366.704117
4,binance_us,Binance US,2019.0,United States,,https://www.binance.us/en,https://assets.coingecko.com/markets/images/46...,False,10,5,16581.311583,16581.311583


### Look at Coingecko BTC data

In [17]:
start_date = datetime.strptime("2014-01-07", "%Y-%m-%d")
end_date = datetime.strptime("2021-04-01", "%Y-%m-%d")
timestamp_start = datetime.timestamp(start_date)
timestamp_end = datetime.timestamp(end_date)

In [None]:
btc_charts_data = cg.get_coin_market_chart_range_by_id("bitcoin", "usd", timestamp_start, timestamp_end)

In [None]:
# jprint(btc_charts_data)

In [None]:
for key, value in enumerate(btc_charts_data):
    elements = []
    for element in btc_charts_data[value]:
        elements.append(element)
    print(f"{value}: # {len(elements)}")

In [None]:
df_btc_charts_data_mc = pd.DataFrame.from_dict(btc_charts_data['market_caps'])
df_btc_charts_data_mc.rename(columns={0:'timestamp', 1:'market_cap'}, inplace=True)
df_btc_charts_data_mc['datetime'] = pd.to_datetime(df_btc_charts_data_mc['timestamp'], unit='ms')
df_btc_charts_data_mc.drop('timestamp', axis=1, inplace=True)
df_btc_charts_data_mc.set_index('datetime', inplace=True)
print(f"Duplicated indices: {df_btc_charts_data_mc.index.duplicated().sum()}")
df_btc_charts_data_mc.tail()

In [None]:
df_btc_charts_data_mc = pd.DataFrame.from_dict(btc_charts_data['market_caps'])
df_btc_charts_data_mc.rename(columns={0:'timestamp', 1:'market_cap'}, inplace=True)

df_btc_charts_data_pc = pd.DataFrame.from_dict(btc_charts_data['prices'])
df_btc_charts_data_pc.rename(columns={0:'timestamp', 1:'price'}, inplace=True)

df_btc_charts_data_vol = pd.DataFrame.from_dict(btc_charts_data['total_volumes'])
df_btc_charts_data_vol.rename(columns={0:'timestamp', 1:'volume'}, inplace=True)


In [None]:
df_btc_charts_merged = df_btc_charts_data_mc.merge(df_btc_charts_data_pc, how="inner", 
                                                   left_on='timestamp', right_on='timestamp').merge(df_btc_charts_data_vol,
                                                                                                                how='inner', left_on='timestamp', 
                                                                                                                 right_on='timestamp')
df_btc_charts_merged.tail()

In [None]:
df_btc_charts_merged['datetime'] = pd.to_datetime(df_btc_charts_merged['timestamp'], unit='ms')
df_cg_btc_data = df_btc_charts_merged.set_index('datetime')
df_cg_btc_data.drop('timestamp', axis=1, inplace=True)
df_cg_btc_data.tail()

In [None]:
print(f"Duplicated indices: {df_cg_btc_data.index.duplicated().sum()}")
print(df_cg_btc_data.nunique())

In [None]:
df_cg_btc_data[df_cg_btc_data.index.duplicated()]

In [None]:
fig = go.Figure()

date = df_cg_btc_data.index
price = df_cg_btc_data['price']
market_cap = df_cg_btc_data['market_cap']
volume = df_cg_btc_data['volume']

fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                   vertical_spacing=0.1, subplot_titles=('Price', 'Volume & Market Cap'),
                   row_width=[0.5, 1])

fig.add_trace(go.Scatter(x=date, y=price,
                    mode='lines',
                    name='Price'), row=1, col=1)

fig.add_trace(go.Scatter(x=date, y=market_cap,
                    name='Market_cap'), row=2, col=1)

fig.add_trace(go.Bar(x=date, y=volume,
                     name='Volume',
                     opacity=0.2,
                     marker=dict(
                         line=dict(color='firebrick', width=2)
                     )), row=2, col=1)

fig.update_annotations(font_size=12)



fig.update_layout(yaxis2_type="log")

fig.show()

### Look at Yahoo Finance BTC data

In [None]:
df_yahoo_btc_data = get_datareader_data('BTC-USD', "yahoo")
df_yahoo_btc_data.tail()

### Comparison Chart BTC Closing Price

In [None]:
fig = go.Figure()

quandl_kraken = df_sample_quandl_kraken
coingecko = df_cg_btc_data
yahoo = df_yahoo_btc_data

fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                   vertical_spacing=0.1, subplot_titles=('Price', 'Volume'),
                   row_width=[0.5, 1])

fig.add_trace(go.Scatter(x=quandl_kraken.index, y=quandl_kraken['Weighted Price'],
                         mode='lines',
                         name='BTC WAvg Price Kraken'), row=1, col=1)

fig.add_trace(go.Scatter(x=coingecko.index, y=coingecko['price'],
                         mode='lines',
                         name='BTC Price Coingecko'), row=1, col=1)

fig.add_trace(go.Scatter(x=yahoo.index, y=yahoo['Adj Close'],
                         mode='lines',
                         name='BTC Adj Close Yahoo Finance'), row=1, col=1)


fig.add_trace(go.Bar(x=quandl_kraken.index, y=quandl_kraken['Volume (Currency)'],
                     name='Volume Kraken',
                     opacity=0.1,
                     marker=dict(
                         line=dict(color='firebrick', width=2)
                     )), row=2, col=1)

fig.add_trace(go.Bar(x=coingecko.index, y=coingecko['volume'],
                     name='Volume Coingecko',
                     opacity=0.6,
                     marker=dict(
                         line=dict(color='darkorange', width=2)
                     )), row=2, col=1)

fig.add_trace(go.Bar(x=yahoo.index, y=yahoo['Volume'],
                     name='Volume Yahoo',
                     opacity=0.5,
                     marker=dict(
                         line=dict(color='floralwhite', width=2)
                     )), row=2, col=1)

fig.update_annotations(font_size=12)

fig.update_layout(yaxis2_type="log")

fig.show()

## Data Ingestion: Populate BTC Data

### Inject more Exchange data from Quandl

In [None]:
from quandl.errors.quandl_error import NotFoundError

exchanges = list(df_cg_exchanges['id'].str.upper())

df_run_all = []
    
for exchange in exchanges:
    exchange_code = f"BCHARTS/{exchange}USD"
   
    try:
        df = get_quandl_data(exchange_code)
        df['Exchange'] = exchange
        df_run_all.append(df)
        
    except NotFoundError as e:
        continue

df_quandl_btc_data = pd.concat(df_run_all)

In [None]:
df_quandl_btc_data.rename(columns={'Volume (BTC)': 'Volume_BTC', 'Volume (Currency)': 'Volume_USD'}, inplace=True)
df_quandl_btc_data.tail()

### Inject BTC Data from Poloniex

In [None]:
df_poloniex_btc_data = get_poloniex_data("USDT_BTC")
df_poloniex_btc_data.columns= df_poloniex_btc_data.columns.str.title()
df_poloniex_btc_data = df_poloniex_btc_data.rename(columns={'Volume': 'Volume_USD', 'Quotevolume': 'Volume_BTC',  
                                                     'Weightedaverage': 'Weighted Price'})

df_poloniex_btc_data['Exchange'] = 'POLONIEX'
column_names=['Open', 'High', 'Low', 'Close', 'Volume_BTC', 'Volume_USD', 'Weighted Price', 'Exchange']
df_poloniex_btc_data = df_poloniex_btc_data.reindex(columns=column_names)

df_poloniex_btc_data.tail()

### Append Poloniex Data to have the final BTC Dataset

In [None]:
df_btc_data = df_quandl_btc_data.append(df_poloniex_btc_data)
df_btc_data.sort_values(by=['Exchange'])
df_btc_data

## Organize Data

In [None]:
engine = db.create_engine('sqlite:///crypto.db', echo=True)
meta = db.MetaData(engine)

with engine.connect() as con:
    con.execute('''
        CREATE TABLE IF NOT EXISTS BTC_Data
              (
                PriceID INT PRIMARY KEY,
                Date DATE NOT NULL, 
                Open MONEY NULL DEFAULT 0,
                High MONEY NULL DEFAULT 0,
                Low MONEY NULL DEFAULT 0,
                Close MONEY NULL DEFAULT 0,
                WeightedPrice MONEY NULL DEFAULT 0,
                Volume_USD MONEY NULL DEFAULT 0,
                Volume_BTC REAL NULL DEFAULT 0,
                Currency VARCHAR(5) NULL DEFAULT "USD",
                FOREIGN KEY(MarketID) REFERENCES Market(MarketID) ON DELETE CASCADE,
                FOREIGN KEY(ExchangeID) REFERENCES Exchange(ExchangeID) ON DELETE CASCADE
                          
              ); ''')
    con.close()

In [None]:
inspector = db.inspect(engine)
inspector.get_table_names()

In [None]:
fig = go.Figure()

exchanges_lab = set(df_btc_data['Exchange'])

for exchange in exchanges_lab:
    fig.add_trace(go.Scatter(x=df_btc_data[df_btc_data['Exchange']==exchange].index, 
                             y=df_btc_data[df_btc_data['Exchange']==exchange]['Weighted Price'],
                             mode='lines',
                             name=f"BTC Weighted Price@ {exchange}"))

fig.update_yaxes(type="log")
    
fig.show()

## Data Ingestion: Populate Altcoin Data

In [None]:
df_cg_markets['symbol']

In [None]:
from quandl.errors.quandl_error import NotFoundError

markets = list(df_cg_markets['symbol'].str.upper())

df_run_mkt_new = []
    
for market in markets:
    market_code = f"BITFINEX/{market}BTC"
   
    try:
        df = get_quandl_data(market_code)
        df['Market'] = market
        df_run_mkt_new.append(df)
        
    except NotFoundError as e:
        continue

df_altcoin_quandl = pd.concat(df_run_mkt_new)

In [None]:
df_altcoin_quandl = df_altcoin_quandl.sort_index()

In [None]:
df_altcoin_quandl

In [None]:
altcoins = markets
df_run_altcoin_new = []

for altcoin in altcoins:
    coinpair = f"BTC_{altcoin}"
    try:
        df = get_poloniex_data(coinpair)
        df['Market'] = altcoin
        df_run_altcoin_new.append(df)
                
    except ValueError as e:
        continue

df_altcoin_poloniex = pd.concat(df_run_altcoin_new)

In [None]:
df_altcoin_poloniex = df_altcoin_poloniex.sort_index()

In [None]:
df_altcoin_poloniex

In [None]:
print(df_btc_quandl_new.head(5))
print(df_btc_poloniex.head(5))
print(df_btc_yahoo.head(5))
print(df_btc_cg.head(5))

In [None]:
print(df_altcoin_quandl.head(5))
print(df_altcoin_poloniex.head(5))

In [None]:
df_btc_poloniex_new = df_btc_poloniex.rename(columns={'Volume': 'Volume_USD', 'Quotevolume': 'Volume_BTC',  
                                                     'Weightedaverage': 'Weighted Price'})

df_btc_poloniex_new['Exchange'] = 'POLONIEX'

In [None]:
column_names=['Open', 'High', 'Low', 'Close', 'Volume_BTC', 'Volume_USD', 'Weighted Price', 'Exchange']
df_btc_poloniex_new = df_btc_poloniex_new.reindex(columns=column_names)

In [None]:
df_btc_quandl_new[df_btc_quandl_new.index=='2021-04-02']

In [None]:
df_btc_poloniex_new[df_btc_poloniex_new.index=='2021-04-02']

In [None]:
df_btc_data = df_btc_quandl_new.append(df_btc_poloniex_new)

In [None]:
df_btc_data = df_btc_data.sort_index()

In [None]:
df_btc_data

In [None]:
df_altcoin_quandl[(df_altcoin_quandl.index=='2021-04-02') & (df_altcoin_quandl['Market']=='ETH')]

In [None]:
df_altcoin_poloniex[(df_altcoin_poloniex.index=='2021-04-02') & (df_altcoin_poloniex['Market']=='ETH')]

In [None]:
data_oil = quandl.get("EIA/PET_RWTC_D")

In [None]:
data_oil

In [None]:
df_btc_data.groupby(df_btc_data.index).agg({'Weighted Price':'mean'}).plot(figsize=(15,10))