In [1]:
import math
import numpy as np
import pandas as pd
from pathlib import Path
import plotly.express as px
from itertools import islice
from numpy.random import normal
import plotly.graph_objects as go
from typing import List, Generator
from datetime import datetime, timedelta

In [2]:
def gen_gbm(period: float, start_amount: float, drift: float, volatility: float) -> Generator[float, None, None]:
    current_amt = start_amount
    i = 1
    while(True):
        c = (current_amt * drift * period) + \
            (current_amt * volatility * np.random.normal(0, math.sqrt(period))) + \
            math.cos(2 * math.pi * i * period) + .5
        yield current_amt + c
        current_amt += c
        i += 1

In [3]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=list(islice(gen_gbm((1/365.), 0, .01, .6), 365*3)), mode='lines', name='Actual'))
fig.update_layout(title=f'Geometric Brownian Motion (with superimposed period based scaled cosine wave)', xaxis_title='index', yaxis_title='value')

In [4]:
def generate_df(total: int, start_amts: List[float], drift: float, volatility: float, 
                end_date: datetime=datetime.now(), output_dir=None):
     # current items
    current_date = end_date - timedelta(days=total)
    
    # generators
    gen = [gen_gbm(period=1/365., 
                   start_amount=start_amts[i], 
                   drift=drift, 
                   volatility=volatility)
                        for i in range(len(start_amts))]

    # empty dataframe
    cols = ['date', 'resource_id', 'earnings']
    df = pd.DataFrame(columns=cols)

    for i in range(total):
        # generate for each
        r = [(current_date.strftime('%Y-%m-%d'), j+1, next(gen[j])) 
                    for j in range(len(gen))]

        # add rows
        df = df.append(pd.DataFrame(r, columns=cols))

        # increment vals
        current_date = current_date + timedelta(days=1)

    # save file if requested
    if output_dir != None:
        output_dir = Path(output_dir).resolve()
        if not output_dir.exists():
            os.makedirs(str(output_dir))
        curds = datetime.now().strftime("%m.%d.%H.%M.%S")
        startds = (end_date - timedelta(days=total)).strftime("%y.%m.%d")
        file_path = f'D{startds}G{curds}.parquet'
        df.to_parquet(output_dir / file_path)

    return df

In [5]:
years = 5
args = {
    'total': 365 * years,
    'start_amts': [200000., 143320, 83420, 50000, 40000],
    'drift': .01,
    'volatility': .4,
    'end_date': datetime.now(),
    'output_dir': '../data'
}
data = generate_df(**args)

fig = px.line(data, x="date", y="earnings", color='resource_id')
fig.update_layout(title=f'Generated Earnings by Resource', xaxis_title='date', yaxis_title='earnings', legend_title='Store', width=960)
fig.show()

In [6]:
agg_data = data.groupby(by=['date']).sum()
agg_data.sort_values(by=['date'])

Unnamed: 0_level_0,earnings
date,Unnamed: 1_level_1
2016-04-26,517442.146891
2016-04-27,513639.248133
2016-04-28,513496.603654
2016-04-29,521520.772918
2016-04-30,527319.069175
...,...
2021-04-20,739450.331120
2021-04-21,740171.640038
2021-04-22,754729.385745
2021-04-23,749583.899974


In [7]:
data.loc[data['resource_id'] == 1].drop(columns=['resource_id'])

Unnamed: 0,date,earnings
0,2016-04-26,201951.417875
0,2016-04-27,201737.966028
0,2016-04-28,202689.518561
0,2016-04-29,212176.864433
0,2016-04-30,216251.409376
...,...,...
0,2021-04-20,409738.724836
0,2021-04-21,414150.395969
0,2021-04-22,418135.648145
0,2021-04-23,423194.688446
