In [2]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# Create sample time series data
np.random.seed(42)
dates = pd.date_range('2023-01-01', periods=100, freq='D')
trend = np.linspace(100, 200, 100)
seasonal = 20 * np.sin(2 * np.pi * np.arange(100) / 30)
noise = np.random.normal(0, 5, 100)
values = trend + seasonal + noise

# Additional sample data for examples
stock_prices = np.array([100, 102, 98, 105, 107, 103, 108, 112, 109, 115])
returns = np.array([0.02, -0.03, 0.05, 0.01, -0.02, 0.04, 0.03, -0.01, 0.02, -0.01])
volumes = np.array([1000, 1200, 800, 1500, 1100, 900, 1300, 1400, 1050, 1250])


In [14]:
# Create multiple lag features in one line
lags = np.column_stack([np.roll(values, i) for i in range(1, 4)])
print(lags)
lags.shape

[[217.84819466 218.90590418 219.17551225]
 [102.48357077 217.84819466 218.90590418]
 [104.47701332 102.48357077 217.84819466]
 [113.39337757 104.47701332 102.48357077]
 [122.40115736 113.39337757 104.47701332]
 [117.73253368 122.40115736 113.39337757]
 [121.20032834 117.73253368 122.40115736]
 [132.97780046 121.20032834 117.73253368]
 [130.79831862 132.97780046 121.20032834]
 [125.62387406 130.79831862 132.97780046]
 [130.82483963 125.62387406 130.79831862]
 [125.10442971 130.82483963 125.62387406]
 [123.64535885 125.10442971 130.82483963]
 [125.08672852 123.64535885 125.10442971]
 [111.69964477 125.08672852 123.64535885]
 [109.6750588  111.69964477 125.08672852]
 [112.34007751 109.6750588  111.69964477]
 [106.93922674 112.34007751 109.6750588 ]
 [110.60822097 106.93922674 112.34007751]
 [101.88599276 110.60822097 106.93922674]
 [ 97.26750418 101.88599276 110.60822097]
 [110.20975597  97.26750418 101.88599276]
 [101.06210938 110.20975597  97.26750418]
 [102.66942534 101.06210938 110.20

(100, 3)

In [4]:
# 5-period rolling standard deviation
rolling_std = np.array([np.std(values[max(0, i-4):i+1]) for i in range(len(values))])
print(rolling_std)


[ 0.          0.99672128  4.7434077   7.91211311  7.617056    6.48794287
  6.51988496  5.85270754  5.70415646  4.2910161   3.12854271  3.01988141
  2.47324575  6.28896219  6.87215363  6.50716041  6.25929543  1.89155826
  3.64778182  5.57336597  5.12079442  5.29986269  4.22425933  4.9644759
  4.49429054  3.92194522  4.08405693  6.98704963  6.04957147  6.54452253
  7.37683557 10.10376036 10.34873433  8.17644247  8.42954356  4.89934857
  6.69457359  5.76775911  3.66977245  4.9819486   5.10509103  5.1447554
  3.60484224  4.06584059  6.8618459   6.32358152  5.44514627  3.59194256
  2.057691    6.5006532   6.30564618  6.45651627  4.40519809  3.90381747
  4.14967444  5.38406442  4.64382818  2.94378992  4.87715762  7.96170841
  8.55525856  7.13303112  4.82339047  3.94496831  8.45718809 10.19937936
  9.56749355  8.01942862  3.34726757  2.85351325  2.85927647  2.96956334
  3.59572591  3.54733506  9.99975898  9.78506699  8.3875216   8.65375915
  5.10172488  8.16225124  6.12330278  4.73777158  5.2

In [5]:
# Identify outliers beyond 2 standard deviations
outliers = values[np.abs((values - np.mean(values)) / np.std(values)) > 2]
print(outliers)


[217.47142868 219.17551225 218.90590418 217.84819466]


In [6]:
# Exponential moving average with alpha=0.3
# ema = np.array([values[0]] + [0.3 * values[i] + 0.7 * ema[i-1] for i, ema in enumerate([values[0]] + [0] * (len(values)-1)) if i > 0][:len(values)-1])
# print(ema)
# uncomment the above lines to see why this doesn't work!

In [7]:
# More readable EMA calculation
alpha = 0.3
ema = values.copy()
for i in range(1, len(ema)):
    ema[i] = alpha * values[i] + (1 - alpha) * ema[i-1]
print(ema)

[102.48357077 103.08160353 106.17513574 111.04294223 113.04981966
 115.49497227 120.73982073 123.7573701  124.31732128 126.26957679
 125.92003267 125.23763052 125.19235992 121.14454538 117.7036994
 116.09461283 113.34799701 112.5260642  109.33404277 105.71408119
 107.06278362 105.26258135 104.48463455 102.0046875  102.15309534
 104.05315577 104.5306376  108.39010002 111.0165406  114.81444644
 118.55846168 126.41074999 130.60466871 133.36341322 139.35010588
 141.51602167 145.98994057 146.4327058  147.99289787 151.41484128
 154.41545331 155.23098112 154.7421986  153.33860636 149.70004495
 147.34662879 145.14360579 144.98821178 143.02591871 137.86319876
 136.94572781 135.03259241 133.29787607 134.31950237 136.22744821
 138.22664817 138.01065545 139.88965601 143.55499207 148.58312987
 151.47124774 155.48370358 158.40738869 161.70848275 168.26753501
 174.71475667 177.89865358 182.30451853 184.72880899 184.95791667
 186.42090871 188.77571141 187.43415226 188.11250405 181.42084775
 180.954719

In [8]:
# Find local peaks (maxima)
peaks = np.where((values[1:-1] > values[:-2]) & (values[1:-1] > values[2:]))[0] + 1
print(peaks)


[ 3  6  9 12 15 17 20 22 25 27 31 34 36 40 45 47 50 55 59 65 67 71 73 75
 82 91 94 97]


In [9]:
# Cumulative returns from daily returns
cumulative_returns = np.cumprod(1 + returns) - 1
print(cumulative_returns)


[ 0.02       -0.0106      0.03887     0.0492587   0.02827353  0.06940447
  0.1014866   0.09047174  0.11228117  0.10115836]


In [10]:
# Min-max normalization
normalized = (values - np.min(values)) / (np.max(values) - np.min(values))
print(normalized)


[0.05095609 0.06716856 0.13968446 0.21294383 0.17497438 0.20317761
 0.29896261 0.28123711 0.23915386 0.2814528  0.23492928 0.2230628
 0.23478531 0.1259095  0.10944374 0.13111807 0.08719349 0.11703306
 0.04609605 0.00853434 0.11379238 0.03939548 0.05246762 0.
 0.05108466 0.09977837 0.07666574 0.17223342 0.17019523 0.22331384
 0.25274088 0.39456449 0.35924919 0.35445082 0.46439601 0.40950519
 0.48968957 0.41679227 0.4506863  0.51384665 0.53025482 0.49542159
 0.46669478 0.43791937 0.36591483 0.37116263 0.35609964 0.39369321
 0.34344498 0.24072334 0.31382274 0.27936891 0.26864645 0.32926171
 0.36159832 0.37958932 0.33579548 0.3908334  0.45454209 0.521297
 0.50417511 0.55814432 0.56126101 0.59527048 0.71044011 0.76075254
 0.7247195  0.78374096 0.7658536  0.72605943 0.76137294 0.797448
 0.71639223 0.76024058 0.56595908 0.68030851 0.62482253 0.5849984
 0.57965484 0.47804451 0.5381627  0.55600141 0.60272249 0.52976452
 0.53324777 0.5677671  0.65359773 0.66322739 0.66598051 0.74895023
 0.774059

In [11]:
# Percentage change between consecutive periods
pct_change = np.diff(stock_prices) / stock_prices[:-1] * 100
print(pct_change)


[ 2.         -3.92156863  7.14285714  1.9047619  -3.73831776  4.85436893
  3.7037037  -2.67857143  5.50458716]


In [12]:
# Binary trend (1 for up, 0 for down)
trend_binary = (np.diff(values) > 0).astype(int)
print(trend_binary)


[1 1 1 0 1 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 1 1 0 0 1 0 1 0
 1 1 1 0 0 0 0 1 0 1 0 0 1 0 0 1 1 1 0 1 1 1 0 1 1 1 1 1 0 1 0 0 1 1 0 1 0
 1 0 0 0 0 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 0]


In [15]:
# Correlation coefficient in one line
price_volume_corr = np.corrcoef(stock_prices, volumes)[0, 1]
print(np.round(price_volume_corr,4))

0.5879
