# Data Visualization

What could be a good dataset to try? How about historical price of Bitcoin?

Data is downloaded from the following source(s):
 - Price history https://www.blockchain.com/charts/market-price
 - Transaction count https://www.blockchain.com/charts/n-transactions
 - Total circulating bitcoin https://www.blockchain.com/charts/total-bitcoins

In [None]:
import os
import csv
import datetime
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot
import matplotlib.patheffects

from sklearn.linear_model import LinearRegression, RANSACRegressor

import style
style.use_dark_theme()
# style.use_light_theme()

In [None]:
def history_from_csv(filename, startrow=0):
    k = 0
    data = []
    header = []
    with open(filename) as file:
        reader = csv.reader(file, delimiter=',')
        for _ in range(startrow):
            next(reader)
        for row in reader:
            if k > 0:
                #data.append([float(x) for x in row])
                data.append(row)
            else:
                header = row
            k += 1
    data = np.array(data)
    return data, header

In [None]:
def v2str(v):
    d = int(np.maximum(-np.log10(v), 0))
    return ('{{:,.{:d}f}}'.format(d)).format(v)

xfmt = matplotlib.ticker.FuncFormatter(lambda x, pos: v2str(x))
yfmt = matplotlib.ticker.FuncFormatter(lambda y, pos: '$' + v2str(y))

years = matplotlib.dates.YearLocator()             # every year
months = matplotlib.dates.MonthLocator()           # every month
years_fmt = matplotlib.dates.DateFormatter('%Y')

### History of Price, Transaction, and Circulating Coins from CSV Files

In [None]:
def str2datetime(x):
    return [datetime.datetime.strptime(n, '%Y-%m-%d %H:%M:%S').replace(hour=0, minute=0, second=0, microsecond=0) for n in x]

# Data points are very sparse, so we will resample them in weeks
rss = 'W-Mon'
# rss = 'M'

filename = 'btc-price.csv'
x, _ = history_from_csv(filename)
d = str2datetime(x[:, 0])
p = [float(n) for n in x[:, 1]]
pp = pd.DataFrame({'Price':p}, index=d)
wp = pp.resample(rss).last()

filename = 'btc-trns.csv'
x, _ = history_from_csv(filename)
d = str2datetime(x[:, 0])
t = [int(n) for n in x[:, 1]]
tt = pd.DataFrame({'Transactions':t}, index=d)
wt = tt.resample(rss).mean()

filename = 'btc-stock.csv'
x, _ = history_from_csv(filename)
d = str2datetime(x[:, 0])
s = [float(n) for n in x[:, 1]]
f = np.concatenate(([s[0], ], np.diff(np.array(s))))
ss = pd.DataFrame({'Stock':s, 'Flow':f}, index=d)
ws = ss.resample(rss).mean().interpolate()

### Combine Tables

In [None]:
df = pd.concat([wp, wt, ws], axis=1, join='inner')
df.head(5)

In [None]:
d = df.index            # Date
p = df.values[:, 0]     # Price
t = df.values[:, 1]     # Transactions (Flow)
s = df.values[:, 2]     # Stock
f = df.values[:, 3]     # Flow
s2f = s / (f * 365.25)  # Stock-to-Flow Ratio (annualized))
mc = s * p

In [None]:
fig = matplotlib.pyplot.figure(figsize=(9, 4.5), dpi=144)
ax = matplotlib.pyplot.axes([0.06, 0.08, 0.92, 0.82])
ax.semilogy(d, p, label='Price')
ax.semilogy(d, t, label='Trans')
ax.semilogy(d, s, label='Count')
ax.semilogy(d, s2f, '.', markersize=2, label='S2F')
ax.legend()
ax.grid()
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)
ax.set_ylabel('Price / Tranactions / Count / S2F')
title_text = ax.set_title('Time History', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

In [None]:
fig = matplotlib.pyplot.figure(figsize=(9, 4.5), dpi=144)
ax = matplotlib.pyplot.axes([0.06, 0.08, 0.92, 0.82])
ax.plot(d, s2f, label='S2F')
ax.legend()
ax.grid()
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)
ax.set_ylabel('Price / Tranactions / Stock / S2F')
title_text = ax.set_title('Time History', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

In [None]:
hh = [
    0,
    df.index.get_loc(pd.to_datetime('2012-11-28'), method='nearest'),
    df.index.get_loc(pd.to_datetime('2016-07-09'), method='nearest'),
    df.index.get_loc(pd.to_datetime('2020-05-11'), method='nearest'),
]
print(hh, np.diff(hh))

In [None]:
# mx = np.logspace(-1, 2.5)
# my = np.exp(3.31954 * np.log(mx) + 14.6227)

In [None]:
fig = matplotlib.pyplot.figure(figsize=(7, 4.5), dpi=144)
ax = matplotlib.pyplot.axes([0.25, 0.12, 0.72, 0.76])
ax.loglog(62, 8.5e12, '.', markersize=20, color='#C29E29', label='Gold (SF62, 8.5T)')
ax.loglog(22, 308e9, '.', markersize=20, color='#999999', label='Silver (SF22, 308B)')

for i in range(len(hh)):
    b = hh[i]
    e = len(d) if i == len(hh) - 1 else hh[i + 1]
    x = s2f[b:e]
    #y = p[b:e]
    y = mc[b:e]
    label = 'Genesis' if i == 0 else 'Halving {}'.format(i)
    ax.loglog(x, y, '.', markersize=3, label=label)

ax.legend(loc=2)
ax.grid()
ax.set_xlim((0.1, 200))
ax.set_ylim((1000, 100e12))

loc = []
for i in range(3, 15):
    loc.append(10 ** i)
ax.yaxis.set_major_locator(matplotlib.ticker.FixedLocator(loc))
ax.xaxis.set_major_formatter(xfmt)
ax.yaxis.set_major_formatter(yfmt)
ax.set_xlabel('S2F')
# ax.set_ylabel('Market Capitalization')
# ax.text(1.8e2, 2e-2, 'Two pizzas for 10k BTC')
title_text = ax.set_title('Market Value', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

In [None]:
fig.savefig(os.path.expanduser('~/Downloads/s2f.png'), facecolor='k')

### Data Fitting

In [None]:
ii = np.sum(mc == 0)
ix = np.expand_dims(np.log10(s2f[ii:]), 1)
iy = np.log10(mc[ii:])

linreg = LinearRegression().fit(ix, iy)
ransac = RANSACRegressor().fit(ix, iy)

mx = np.expand_dims(np.logspace(-1, 2.5), 1)
my = 10 ** linreg.predict(np.log10(mx))
print('Linear Regression: log10(y) = {:.4f} * log10(S2F) + {:.4f}'.format(linreg.coef_[0], linreg.intercept_))

my2 = 10 ** ransac.predict(np.log10(mx))
print('RANSAC Regression: log10(y) = {:.4f} * log10(S2F) + {:.4f}'.format(ransac.estimator_.coef_[0], ransac.estimator_.intercept_))

In [None]:
fig = matplotlib.pyplot.figure(figsize=(7, 4.5), dpi=144)
ax = matplotlib.pyplot.axes([0.25, 0.12, 0.72, 0.76])
ax.tick_params(axis='y')
# ax.plot(mx, my, '--', linewidth=0.5, color='#666666')
ax.plot(mx, my2, '--', linewidth=0.5, color='#666666')
ax.plot(62, 8.5e12, '.', markersize=20, color='#C29E29', label='Gold (SF62, 8.5T)')
ax.plot(22, 308e9, '.', markersize=20, color='#999999', label='Silver (SF22, 308B)')
ax.text(52, 6e12, 'Gold (SF62, 8.5T)', fontsize=8, ha='right')
ax.text(26, 2e11, 'Silver (SF22, 308B)', fontsize=8)

for i in range(len(hh)):
    b = hh[i]
    e = len(d) if i == len(hh) - 1 else hh[i + 1]
#     x = d[b:e]
    x = s2f[b:e]
#     y = p[b:e]
    y = mc[b:e]
    w = np.array(df.index[b:e] - df.index[b], dtype=np.float) / 86400e9 / 365.25 * 12
    label = 'Genesis' if i == 0 else 'Halving {}'.format(i)
    hs = ax.scatter(x, y, c=w, vmin=0, vmax=48, cmap='rainbow', s=3, label=label)
# ax.legend(loc=2)
ax.set_xscale('log')
ax.set_yscale('log')
ax.grid()
# ax.set_xlim((0.1, 100))
# ax.set_ylim((0.01, 1e6))
ax.set_xlim((0.1, 200))
ax.set_ylim((1e4, 100e12))
loc = []
for i in range(3, 15):
    loc.append(10 ** i)
ax.yaxis.set_major_locator(matplotlib.ticker.FixedLocator(loc))
ax.xaxis.set_major_formatter(xfmt)
ax.yaxis.set_major_formatter(yfmt)
ax.set_xlabel('S2F')

cax = fig.add_axes((0.55, 0.2, 0.4, 0.03))
fig.colorbar(hs, cax=cax, orientation='horizontal')
cax.set_title('Months After Halving')

for i, p in enumerate(((1, 1e6), (3.5, 1e8), (10, 2e9), (20, 2e10))):
    label = 'Genesis' if i == 0 else 'Halving {}'.format(i)
    ax.text(p[0], p[1], label, fontsize=8)

title_text = ax.set_title('Market Value', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

In [None]:
fig.savefig(os.path.expanduser('~/Downloads/s2f-m.png'), facecolor='k')