In [1]:
%load_ext cython
%load_ext heat

In [2]:
%load_ext line_profiler
import line_profiler

In [3]:
import Cython

In [4]:
#Set compiler directives (cf. http://docs.cython.org/src/reference/compilation.html)
directive_defaults = Cython.Compiler.Options.get_directive_defaults() 
directive_defaults['linetrace'] = True
directive_defaults['binding'] = True

In [5]:
from frame import DataFrame
import pandas as pd
import numpy as np

pdf = pd.read_csv(r"C:\Users\evanw\OneDrive\Desktop\AT\Data\Binance-BTCUSDT.csv", index_col=0)
pdf.index = pd.to_datetime(pdf.index)

df = DataFrame.from_pandas(pdf)

In [6]:
%timeit df.resample("f").mean

7.22 ms ± 222 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
%timeit pdf.Open.resample("1W").mean()

In [9]:
%%cython -f --compile-args=-DCYTHON_TRACE=1

import numpy as np
cimport numpy as cnp

from util cimport datetime64, timedelta64

# May change to double depending on what I use
# this function for
cdef inline cnp.int64_t ns_to_days(cnp.int64_t ns):
    # 10^9 ns to 1 sec
    # 60 sec to 1 min
    # 60 min to 1 hour
    # 24 hour to 1 day
    # (10 ** 9) * 60 * 60 * 24
    # 86400000000000 ns = 1 day
    return ns / 86400000000000

cdef inline cnp.int64_t days_to_ns(cnp.int64_t days):
    return days * 86400000000000

cdef inline cnp.int64_t weekday(cnp.int64_t dt):
    # 259200000000000 ns = 3 days
    return days_to_ns((dt - (259200000000000)) % 7)

def resample(frame):
    cdef datetime64[:] index = frame.index.keys_
    cdef datetime64 index0 = index[0]
    
    cdef datetime64 week_start = index0 - weekday(index0)
    cdef datetime64 stop = index[-1] + (days_to_ns(7) - weekday(index[-1]))
        
    cdef timedelta64 td = np.timedelta64(1, "W").astype("timedelta64[ns]").astype("int64")
    
    cdef datetime64[:] bins = np.arange(start=week_start, stop=stop, step=td)

    cdef splitted = np.split(frame.Open.values, np.cumsum(
        np.bincount(np.digitize(index, bins, right=False))[1:]
    ))[:-1]
    

    cdef int length = len(splitted)
    cdef double[:] ret = np.zeros(length)
    
    for l in range(length):
        ret[l] = np.average(splitted[l])
        
    return ret, bins

In [None]:
#Print profiling statistics using the `line_profiler` API
profile = line_profiler.LineProfiler(resample)
profile.runcall(resample, df)
profile.print_stats()

In [None]:
%lprun -f approx_pi -f recip_square approx_pi(1000000)

In [None]:
x, y = resample(df)
y_t = np.asarray(y).astype("datetime64[ns]")
x_t = np.asarray(x)

pdf.resample("1W").Open.mean()

In [None]:
%timeit pdf.Open.resample("1W").mean()

In [None]:
%timeit resample(frame)

In [None]:
pd.Series(x_t, index=y_t)

In [10]:
# 10^9 ns = 1 sec
# 60 sec = 1 min
# 60 min = 1 hour
# 24 hours = 1 day
10 ** 9 * 60 * 60

3600000000000

In [32]:
dt = pdf.index.values[0].astype("datetime64[h]").astype("int64")
dt

438288

In [31]:
# round to nearest hour
dt / 60

2.6297281e+16

In [11]:
def weekday1(dt):
    return ((dt.astype("int64") - 3) % 7) * 86400000000000

# W, D, h, m



frame = df

x = np.asarray(frame.index.keys_)
index0 = x[0]
indexLast = x[-1]

start = (index0.astype('datetime64[D]').view('int64') - weekday1(index0)).astype("datetime64[D]")
stop = (indexLast.astype('datetime64[D]').view('int64') + (7 - weekday1(indexLast)).astype("datetime64[D]"))

td = np.timedelta64(1, "W").astype("timedelta64[ns]").astype("int64")

bins = np.arange(start=start.astype("int64"), stop=x[-1], step=td)

d = np.digitize(x.astype(np.int64), bins.astype(np.int64), right=True)

count = np.bincount(d)[1:]
s = np.cumsum(count)
splitted = np.split(frame.Open.values, s)[:-1]

length = len(splitted)
ret = np.zeros(length)
for l in range(length):
    ret[l] = np.average(splitted[l])

In [None]:
%%cython

import numpy as np
cimport numpy as np

def bincount(x):
    cdef int[:] result = np.zeros(x.max() + 1, int)
    cdef int i
    for i in x:
        result[i] += 1
    return result

In [None]:
c1 = np.bincount(d)
c2 = bincount(d)

In [None]:
%timeit np.bincount(d)

In [None]:
%timeit bincount(d)

In [None]:
y[-1]

In [None]:
indexLast.astype('datetime64[ns]')

In [None]:
np.int64(df.index.keys_[-1]).astype('datetime64[ns]')

In [None]:
len(splitted)

In [None]:
x, y = resample1(df)
y = np.asarray(bins).astype("datetime64[ns]")
x_t = np.asarray(x)

pdf.Open.resample("1W").mean()

In [None]:
pd.Series(x_t, index=y_t)

In [None]:
resample1(df)

In [None]:
x = np.asarray(df.index.keys_)
index0 = x[0]
start = (index0 - weekday1(index0)).astype("datetime64[D]")

In [None]:
start

In [None]:
np.datetime64(index0 - (((index0 - (259200000000000)) % 7) * 86400000000000), "ns")

In [None]:
np.int64(index0).astype("datetime64[ns]")

In [None]:
np.int64(index0 - (((index0 - (259200000000000)) % 7) * 86400000000000)).astype("datetime64[ns]")

In [None]:
((index0 - (259200000000000)) % 7) * 86400000000000

In [None]:
g = [0, 1, 2]
g[:1]