In [1]:
import math
import numpy as np
import pandas as pd
from pathlib import Path
import plotly.express as px
from itertools import islice
from numpy.random import normal
import plotly.graph_objects as go
from typing import List, Generator
from datetime import datetime, timedelta

In [2]:
def gen_gbm(period: float, start_amount: float, drift: float, volatility: float) -> Generator[float, None, None]:
    current_amt = start_amount
    i = 1
    while(True):
        c = (current_amt * drift * period) + \
            (current_amt * volatility * np.random.normal(0, math.sqrt(period))) + \
            math.cos(2 * math.pi * i * period) + .5
        yield current_amt + c
        current_amt += c
        i += 1


In [3]:
def generate_df(total: int, start_amts: List[float], drift: float, volatility: float, 
                end_date: datetime=datetime.now(), output_dir=None):
     # current items
    current_date = end_date - timedelta(days=total)
    
    # generators
    gen = [gen_gbm(period=1/365., 
                   start_amount=start_amts[i], 
                   drift=drift, 
                   volatility=volatility)
                        for i in range(len(start_amts))]

    # empty dataframe
    cols = ['date', 'resource_id', 'earnings']
    df = pd.DataFrame(columns=cols)

    for i in range(total):
        # generate for each
        r = [(current_date.strftime('%Y-%m-%d'), j+1, next(gen[j])) 
                    for j in range(len(gen))]

        # add rows
        df = df.append(pd.DataFrame(r, columns=cols))

        # increment vals
        current_date = current_date + timedelta(days=1)

    # save file if requested
    if output_dir != None:
        output_dir = Path(output_dir).resolve()
        if not output_dir.exists():
            os.makedirs(str(output_dir))
        curds = datetime.now().strftime("%m.%d.%H.%M.%S")
        startds = (end_date - timedelta(days=total)).strftime("%y.%m.%d")
        file_path = f'D{startds}G{curds}.csv'
        df.to_csv(output_dir / file_path)
        #file_path = f'D{startds}G{curds}.parquet'
        #df.to_parquet(output_dir / file_path)

    return df

In [5]:
years = 10
args = {
    'total': 365 * years,
    'start_amts': [200000., 143320, 83420, 50000, 40000],
    'drift': .01,
    'volatility': .4,
    'end_date': datetime.now(),
    'output_dir': './data'
}
data = generate_df(**args)

In [9]:
agg_data = data.groupby(by=['date']).sum()
agg_data.sort_values(by=['date'])

Unnamed: 0_level_0,earnings
date,Unnamed: 1_level_1
2011-07-04,511716.153598
2011-07-05,510619.454825
2011-07-06,514834.328123
2011-07-07,523359.300008
2011-07-08,525409.094955
...,...
2021-06-26,455278.196024
2021-06-27,446486.606696
2021-06-28,442601.457121
2021-06-29,438870.560785


In [10]:
data.loc[data['resource_id'] == 1].drop(columns=['resource_id'])


Unnamed: 0,date,earnings
0,2011-07-04,198929.120667
0,2011-07-05,197192.239731
0,2011-07-06,201978.345748
0,2011-07-07,207230.230144
0,2011-07-08,208506.400189
...,...,...
0,2021-06-26,325592.202968
0,2021-06-27,318715.866197
0,2021-06-28,316066.313342
0,2021-06-29,310637.254478
