# Regression Fitting

What could be a good dataset to try? How about historical price of Bitcoin?

Data is downloaded from the following source(s):
 - Price history from https://www.blockchain.com/charts/market-price
 - Transaction count from https://www.blockchain.com/charts/n-transactions

In [None]:
import csv
import datetime
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot
import matplotlib.patheffects

from sklearn.linear_model import LinearRegression

import style
style.use_dark_theme()
# style.use_light_theme()

In [None]:
def history_from_csv(filename, startrow=0):
    k = 0
    data = []
    header = []
    with open(filename) as file:
        reader = csv.reader(file, delimiter=',')
        for _ in range(startrow):
            next(reader)
        for row in reader:
            if k > 0:
                #data.append([float(x) for x in row])
                data.append(row)
            else:
                header = row
            k += 1
    data = np.array(data)
    return data, header

In [None]:
years = matplotlib.dates.YearLocator()             # every year
months = matplotlib.dates.MonthLocator()           # every month
years_fmt = matplotlib.dates.DateFormatter('%Y')

### Price and Transaction History from CSV Files

In [None]:
filename = 'btc-price.csv'
x, _ = history_from_csv(filename)
d = [datetime.datetime.strptime(n, '%Y-%m-%d %H:%M:%S') for n in x[:, 0]]
p = [float(n) for n in x[:, 1]]
pdf = pd.DataFrame({'Price':p}, index=d)
# wpdf = pdf.resample('W-Mon').fillna('pad')
wpdf = pdf.resample('W-Mon').max()

filename = 'btc-trns.csv'
x, _ = history_from_csv(filename)
d = [datetime.datetime.strptime(n, '%Y-%m-%d %H:%M:%S') for n in x[:, 0]]
t = [int(n) for n in x[:, 1]]
tdf = pd.DataFrame({'Transactions':t}, index=d)
# weekly = tdf.resample('W-Mon', on='Date').sum()
wtdf = tdf.resample('W-Mon').mean()

### Combine the Two DataFrames

In [None]:
df = pd.concat([wpdf, wtdf], axis=1, join='inner')
df.head(5)

In [None]:
d = df.index            # Date
p = df.values[:, 0]     # Price
t = df.values[:, 1]     # Transactions (Flow)
s = np.cumsum(t)        # Stock
s2f = s / t             # Stock-to-Flow Ratio

In [None]:
fig = matplotlib.pyplot.figure(figsize=(9, 4.5), dpi=160)
ax = matplotlib.pyplot.axes([0.06, 0.08, 0.92, 0.82])
ax.semilogy(d, p, label='Price')
ax.semilogy(d, s2f, '.', markersize=2, label='S2F')
ax.legend()
ax.grid()
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)
ax.set_xlabel('Date')
ax.set_ylabel('Price / S2F Ratio')
title_text = ax.set_title('Time History', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

In [None]:
# df.index.get_loc('2015-07-08')
i1 = df.index.get_loc(pd.to_datetime('2012-11-28'), method='nearest')
i2 = df.index.get_loc(pd.to_datetime('2016-07-09'), method='nearest')
i3 = df.index.get_loc(pd.to_datetime('2020-05-11'), method='nearest')
print(i1, i2, i3)

In [None]:
fig = matplotlib.pyplot.figure(figsize=(7, 4.5), dpi=160)
ax = matplotlib.pyplot.axes([0.06, 0.08, 0.92, 0.82])
ax.tick_params(axis='y')
ax.loglog(s2f[20:i1], p[20:i1], '.', label='Genesis')
ax.loglog(s2f[i1:i2], p[i1:i2], '.', label='Halving 1')
ax.loglog(s2f[i2:i3], p[i2:i3], '.', label='Halving 2')
ax.loglog(s2f[i3:], p[i3:], '.', label='Halving 3')
ax.legend()
ax.grid()
ax.set_xlim((4, 1000))
ax.set_ylim((0.01, 200000))
ax.set_xlabel('S2F')
ax.set_ylabel('Price')
title_text = ax.set_title('S2F', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

In [None]:
x = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
# y = 1 * x_0 + 2 * x_1 + 3
y = np.dot(x, np.array([1, 2])) + 3
reg = LinearRegression().fit(x, y)
reg.score(x, y)

In [None]:
reg.coef_

In [None]:
reg.intercept_