In [1]:
import pandas as pd

In [2]:
import numba as nb
import numpy as np
import math

In [3]:

@nb.njit
def get_demand_charge(a: np.array, n_intervals = 3):
    length = a.shape[0]
    max_avg = 0.0
    idx = 0
    for i in range(length - n_intervals):
        x = a[i:i+3].mean()
        if x > max_avg:
            max_avg = x
            idx = i
    return (idx, max_avg)
    

In [4]:
def agg_get_demand_info(s: pd.Series):
    idx, val = get_demand_charge(s.values)
    value = (s.reset_index()['index'].iloc[idx], val)
    return value

    

In [5]:
td = {int: 0, float: 0.}

In [6]:
my_dti = pd.date_range('2018-11-10', '2019-02-17', freq='5min')
my_dti

DatetimeIndex(['2018-11-10 00:00:00', '2018-11-10 00:05:00',
               '2018-11-10 00:10:00', '2018-11-10 00:15:00',
               '2018-11-10 00:20:00', '2018-11-10 00:25:00',
               '2018-11-10 00:30:00', '2018-11-10 00:35:00',
               '2018-11-10 00:40:00', '2018-11-10 00:45:00',
               ...
               '2019-02-16 23:15:00', '2019-02-16 23:20:00',
               '2019-02-16 23:25:00', '2019-02-16 23:30:00',
               '2019-02-16 23:35:00', '2019-02-16 23:40:00',
               '2019-02-16 23:45:00', '2019-02-16 23:50:00',
               '2019-02-16 23:55:00', '2019-02-17 00:00:00'],
              dtype='datetime64[ns]', length=28513, freq='5T')

In [7]:
my_df = pd.DataFrame(index=my_dti)
my_df.head()

2018-11-10 00:00:00
2018-11-10 00:05:00
2018-11-10 00:10:00
2018-11-10 00:15:00
2018-11-10 00:20:00


In [8]:
my_df['month'] = my_df.index.month
my_df.head()

Unnamed: 0,month
2018-11-10 00:00:00,11
2018-11-10 00:05:00,11
2018-11-10 00:10:00,11
2018-11-10 00:15:00,11
2018-11-10 00:20:00,11


In [9]:
my_df['hour'] = my_df.index.hour
my_df.head()

Unnamed: 0,month,hour
2018-11-10 00:00:00,11,0
2018-11-10 00:05:00,11,0
2018-11-10 00:10:00,11,0
2018-11-10 00:15:00,11,0
2018-11-10 00:20:00,11,0


In [10]:
from pandas.tseries.holiday import USFederalHolidayCalendar
my_holidays = USFederalHolidayCalendar().holidays()

In [11]:
weekend_mask = [5,6]

In [12]:
my_df['weekend'] = False
my_df.loc[
    my_df.index.floor('D').isin(my_holidays.date) | my_df.index.dayofweek.isin(weekend_mask),
    'weekend'] = True

In [13]:
my_df.groupby(pd.Grouper(freq='D')).head(n=1).head()

Unnamed: 0,month,hour,weekend
2018-11-10,11,0,True
2018-11-11,11,0,True
2018-11-12,11,0,True
2018-11-13,11,0,False
2018-11-14,11,0,False


In [180]:
from numba import cuda

In [181]:
def mult_coeff(x: np.array, y: int, z: np.array, out: np.array):
    for i in range(x.shape[0]):
        out[i] = np.array([
            x[i][0],
            y,
            z[i],
            x[i].sum(),
            x[i].sum() / y,
            x[i].sum() * z[i]
        ], dtype = np.float32)
    return out

ra = np.random.random(len(my_df.index))

jit_coeff=nb.jit(mult_coeff, nopython=False, cache=True)
njit_coeff=nb.jit(mult_coeff, nopython=True, nogil=True, cache=True)
cuda_coeff=cuda.jit(mult_coeff, nopython=True, nogil=True)
@nb.jit(nopython=True, parallel=True, nogil=True)
def parallel_coeff(x: np.array, y: int, z: np.array, out:np.array):
    for i in nb.prange(len(x)):
        out[i] = np.array([
            x[i][0],
            y,
            z[i],
            x[i].sum(),
            x[i].sum() / y,
            x[i].sum() * z[i]
        ], dtype = np.float32)
    return out
#cuda_coeff=nb.jit(mult_coeff, target='cuda')

In [108]:
ra

array([0.24663715, 0.94456611, 0.15312107, ..., 0.07034883, 0.19421723,
       0.35419208])

In [70]:
my_df.iloc[0].name

Timestamp('2018-11-10 00:00:00', freq='5T')

In [103]:
tmp_df = my_df.assign(i=my_df.reset_index().index)

In [139]:
nrows = len(tmp_df.index)
nrows

28513

In [162]:
out_arr = np.zeros((nrows, 6), dtype=np.float32)

In [163]:
%%timeit -n 10 
mult_coeff(tmp_df[['month', 'hour', 'weekend', 'i']].values.astype(np.float32), 10, ra, out_arr)

2.39 s ± 308 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [171]:
out_arr = np.zeros((nrows, 6), dtype=np.float32)

In [172]:
%%timeit -n 10
jit_coeff(tmp_df[['month', 'hour', 'weekend', 'i']].values.astype(np.float32), 10, ra, out_arr)

58.1 ms ± 14.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [173]:
out_arr = np.zeros((nrows, 6), dtype=np.float32)

In [174]:
%%timeit -n 10
njit_coeff(tmp_df[['month', 'hour', 'weekend', 'i']].values.astype(np.float32), 10, ra, out_arr)

47.3 ms ± 1.87 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [175]:
out_arr = np.zeros((nrows, 6), dtype=np.float32)

In [176]:
%%timeit -n 10
parallel_coeff(tmp_df[['month', 'hour', 'weekend', 'i']].values.astype(np.float32), 10, ra, out_arr)

77.1 ms ± 46.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [183]:
out_arr = np.zeros((nrows, 6), dtype=np.float32)

In [184]:
%%timeit -n 10
cuda_coeff(tmp_df[['month', 'hour', 'weekend', 'i']].values.astype(np.float32), 10, ra, out_arr)

CudaSupportError: Error at driver init: 

CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBAPRO_CUDA_DRIVER
with the file path of the CUDA driver shared library.
: