# Notebook to generate sample prices

In [31]:
import io
import numpy as np
import datetime as dt
import yfinance as yf

from config import PKGDIR

outfile = PKGDIR.joinpath("samples/sample-prices.csv")

In [32]:
ticker = "AAPL"
prices = yf.Ticker(ticker).history("5y")
prices = prices.rename(columns=str.lower).rename_axis(index=str.lower)
prices["change"] = prices.close.pct_change()
prices = prices.filter(["open", "high", "low", "close", "volume", "change"])
prices

Unnamed: 0_level_0,open,high,low,close,volume,change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-09-24 00:00:00-04:00,51.913947,52.977028,51.868454,52.864494,110773600,
2018-09-25 00:00:00-04:00,52.615489,53.350551,52.603517,53.199707,98217600,0.006341
2018-09-26 00:00:00-04:00,52.914789,53.573231,52.617890,52.775917,95938800,-0.007966
2018-09-27 00:00:00-04:00,53.589985,54.217299,53.522940,53.860542,120724800,0.020552
2018-09-28 00:00:00-04:00,53.822241,54.073647,53.637880,54.049706,91717600,0.003512
...,...,...,...,...,...,...
2023-09-18 00:00:00-04:00,176.479996,179.380005,176.169998,177.970001,67257600,0.016913
2023-09-19 00:00:00-04:00,177.520004,179.630005,177.130005,179.070007,51826900,0.006181
2023-09-20 00:00:00-04:00,179.259995,179.699997,175.399994,175.490005,58436200,-0.019992
2023-09-21 00:00:00-04:00,174.550003,176.300003,173.860001,173.929993,63047900,-0.008889


In [33]:
raw_data = prices.to_csv(lineterminator="\n")
raw_data

'date,open,high,low,close,volume,change\n2018-09-24 00:00:00-04:00,51.91394716701711,52.977028304524644,51.86845424029647,52.86449432373047,110773600,\n2018-09-25 00:00:00-04:00,52.61548895837528,53.35055124037557,52.60351655783991,53.19970703125,98217600,0.006340980119222683\n2018-09-26 00:00:00-04:00,52.914788883277744,53.57323082639545,52.617890110042985,52.775917053222656,95938800,-0.007966020898919735\n2018-09-27 00:00:00-04:00,53.58998504123538,54.217299376610235,53.52294032842459,53.86054229736328,120724800,0.020551518660430368\n2018-09-28 00:00:00-04:00,53.822240816937295,54.07364665704644,53.63787970052634,54.049705505371094,91717600,0.0035120925252376534\n2018-10-01 00:00:00-04:00,54.57884489079135,54.930812294555984,54.19575362818938,54.41363525390625,94403200,0.006733242024768993\n2018-10-02 00:00:00-04:00,54.411235988873756,55.069677788519094,54.26278846133475,54.89728546142578,99152800,0.008888400954332631\n2018-10-03 00:00:00-04:00,55.081654936212665,55.90051673375823,55

In [34]:
def load_prices(file):
    return np.genfromtxt(
        file,
        delimiter=",",
        converters={0: dt.datetime.fromisoformat},
        dtype=None,
        names=True,
        encoding="utf-8",
    )


def check_prices(data):
    assert isinstance(data, np.ndarray)
    assert data.dtype.names is not None
    assert len(data) > 1000


result = load_prices(io.StringIO(raw_data))
check_prices(result)
result

array([(datetime.datetime(2018, 9, 24, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),  51.91394717,  52.9770283 ,  51.86845424,  52.86449432, 110773600,         nan),
       (datetime.datetime(2018, 9, 25, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),  52.61548896,  53.35055124,  52.60351656,  53.19970703,  98217600,  0.00634098),
       (datetime.datetime(2018, 9, 26, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),  52.91478888,  53.57323083,  52.61789011,  52.77591705,  95938800, -0.00796602),
       ...,
       (datetime.datetime(2023, 9, 20, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 179.25999451, 179.69999695, 175.3999939 , 175.49000549,  58436200, -0.01999219),
       (datetime.datetime(2023, 9, 21, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 174.55000305, 176.30000305, 173.86000061, 173.92999268,  63047900, -0.00888947),
   

In [35]:
# save data after checking it is valid !!!

result = load_prices(io.StringIO(raw_data))
check_prices(result)

print(f"Updating {outfile.name} ...")
outfile.write_text(raw_data)

Updating sample-prices.csv ...


164232