# Regression Fitting

What could be a good dataset to try? How about historical price of Bitcoin?

Data is downloaded from the following sources:
 - Price history from http://www.cryptodatadownload.com/data/bitstamp/
 - Transactions count from https://www.blockchain.com/charts/n-transactions

In [None]:
import os
import csv
import datetime
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot
import matplotlib.patheffects

from sklearn.linear_model import LinearRegression

import style
style.use_dark_theme()
# style.use_light_theme()

In [None]:
def history_from_csv(filename, startrow=0):
    k = 0
    data = []
    header = []
    with open(filename) as file:
        reader = csv.reader(file, delimiter=',')
        for _ in range(startrow):
            next(reader)
        for row in reader:
            if k > 0:
                #data.append([float(x) for x in row])
                data.append(row)
            else:
                header = row
            k += 1
    data = np.array(data)
    return data, header

In [None]:
years = matplotlib.dates.YearLocator()             # every year
months = matplotlib.dates.MonthLocator()           # every month
years_fmt = matplotlib.dates.DateFormatter('%Y')

### Price History

In [None]:
filename = 'Bitstamp_BTCUSD_d.csv'
x, h = history_from_csv(filename, startrow=1)
d = [datetime.datetime.fromtimestamp(int(n[:-2])) for n in x[:, 0]]
p = [float(n) for n in x[:, 6]]

# pdf = pd.DataFrame({'Date': d, 'Price':p})
pdf = pd.DataFrame({'Price':p}, index=d, )
wpdf = pdf.resample('W-Mon').last()
wpdf.head(5)

In [None]:
fig = matplotlib.pyplot.figure(figsize=(9, 4.5), dpi=160)
ax = matplotlib.pyplot.axes([0.06, 0.08, 0.92, 0.82])
ax.tick_params(axis='y')

# ax.semilogy(t, c)
ax.semilogy(wpdf.index, wpdf.values)
ax.grid()

# format the ticks
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)

ax.set_xlabel('Date')
ax.set_ylabel('Price')
title_text = ax.set_title('Price', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

### Transactions History

In [None]:
filename = os.path.expanduser('~/Downloads/btc-trns.csv')
x, h = history_from_csv(filename)
d = [datetime.datetime.strptime(n, '%Y-%m-%d %H:%M:%S') for n in x[:, 0]]
# t = matplotlib.dates.date2num(t)
t = [int(n) for n in x[:, 1]]

tdf = pd.DataFrame({'Transactions':t}, index=d)
# weekly = tdf.resample('W-Mon', on='Date').sum()
wtdf = tdf.resample('W-Mon').mean()
wtdf.head(5)

In [None]:
fig = matplotlib.pyplot.figure(figsize=(9, 4.5), dpi=160)
ax = matplotlib.pyplot.axes([0.06, 0.08, 0.92, 0.82])
ax.tick_params(axis='y')
ax.semilogy(d, t, label='daily')
ax.semilogy(wtdf.index, wtdf.values, label='weekly avg')
ax.legend()
ax.grid()

# format the ticks
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)

ax.set_xlabel('Date')
ax.set_ylabel('Transactions')
title_text = ax.set_title('BTC Transactions', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

In [None]:
s = np.cumsum(t)
s2f = s / t

In [None]:
fig = matplotlib.pyplot.figure(figsize=(9, 4.5), dpi=160)
ax = matplotlib.pyplot.axes([0.06, 0.08, 0.92, 0.82])
ax.tick_params(axis='y')
ax.semilogy(d, s, label='stock')
ax.semilogy(d, s2f, '.', markersize=3, label='S2F')
ax.grid()

# format the ticks
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)

ax.set_xlabel('Date')
ax.set_ylabel('Transactions')
title_text = ax.set_title('BTC Transactions', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

In [None]:
fig = matplotlib.pyplot.figure(figsize=(9, 4.5), dpi=160)
ax = matplotlib.pyplot.axes([0.06, 0.08, 0.92, 0.82])
ax.tick_params(axis='y')
ax.loglog(s2f, s, '.', label='stock')
# ax.semilogy(t, s2f, '.', markersize=3, label='S2F')
ax.grid()

# # format the ticks
# ax.xaxis.set_major_locator(years)
# ax.xaxis.set_major_formatter(years_fmt)
# ax.xaxis.set_minor_locator(months)

ax.set_xlabel('S2F')
ax.set_ylabel('Transactions')
title_text = ax.set_title('BTC S2F', fontweight='bold', fontsize=16)
title_text.set_path_effects([
    matplotlib.patheffects.Stroke(linewidth=1, foreground=(0, 0, 0, 0.7)),
    matplotlib.patheffects.Normal()
])

In [None]:
x = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
# y = 1 * x_0 + 2 * x_1 + 3
y = np.dot(x, np.array([1, 2])) + 3
reg = LinearRegression().fit(x, y)
reg.score(x, y)

In [None]:
reg.coef_

In [None]:
reg.intercept_