In [210]:
import pandas as pd
from dateutil import parser
from datetime import datetime, timedelta
import numpy as np
import matplotlib.pyplot as plt
#primeDF = pd.read_csv(f"itemDataFrames/ash_prime_set.csv", index_col=0)
#print(parser.parse("2024-01-19 02:17:31") - parser.parse("2022-01-19 02:17:31"))

In [211]:
#From https://stackoverflow.com/questions/46030055/python-time-weighted-average-pandas-grouped-by-time-interval
#Answer by hugovdberg
def resample_time_weighted_mean(x, target_index, closed=None, label=None):
    shift = 1 if closed == "right" else -1
    fill = "bfill" if closed == "right" else "ffill"
    # Determine length of each interval (daylight saving aware)
    extended_index = target_index.union(
        [target_index[0] - target_index.freq, target_index[-1] + target_index.freq]
    )
    interval_lengths = -extended_index.to_series().diff(periods=shift)

    # Create a combined index of the source index and target index and reindex to combined index
    combined_index = x.index.union(extended_index)
    x = x.reindex(index=combined_index, method=fill)
    interval_lengths = interval_lengths.reindex(index=combined_index, method=fill)

    # Determine weights of each value and multiply source values
    weights = -x.index.to_series().diff(periods=shift) / interval_lengths
    x = x.mul(weights, axis=0)

    # Resample to new index, the final reindex is necessary because resample 
    # might return more rows based on the frequency
    return (
        x.resample(target_index.freq, closed=closed, label=label)
        .sum()
        .reindex(target_index)
    )

In [217]:
import re
from datetime import timedelta

def applyCountDataRecorded(datetime, timeInterval, originalDF):
    return len(originalDF[(originalDF['datetime'] > datetime - timeInterval) & (originalDF['datetime'] <= datetime)])

regex = re.compile(r'^((?P<days>[\.\d]+?)d)?((?P<hours>[\.\d]+?)h)?((?P<minutes>[\.\d]+?)T)?((?P<seconds>[\.\d]+?)s)?$')
def parse_time(time_str):
    """
    Parse a time string e.g. (2h13m) into a timedelta object.

    Modified from virhilo's answer at https://stackoverflow.com/a/4628148/851699

    :param time_str: A string identifying a duration.  (eg. 2h13m)
    :return datetime.timedelta: A datetime.timedelta object
    """
    parts = regex.match(time_str)
    assert parts is not None, "Could not parse any time information from '{}'.  Examples of valid strings: '8h', '2d8h5m20s', '2m4s'".format(time_str)
    time_params = {name: float(param) for name, param in parts.groupdict().items() if param}
    return timedelta(**time_params)


#Returns a dataframe object with datetime column converted to datetime objects
#itemName has underscores such as ash_prime_set
def initDF(itemName):
    df = pd.read_csv(f"itemDataFrames/{itemName}.csv", index_col=0)
    df["datetime"] = pd.to_datetime(df['datetime'])
    return df

#converts a dataframe to the timeWeightedAverage dataframe
#timestep is a string of the form "15min" or "30T"
def convertTWA(df, timeStep):
    df = df.set_index("datetime")
    opts = dict(closed="right", label="right")
    return resample_time_weighted_mean(
        df, pd.DatetimeIndex(df.resample(timeStep, **opts).groups.keys(), freq="infer"), **opts
        ).reset_index().rename(columns={"index": "datetime"})

#initializes from scratch a time weighted average dataframe according to
#a timestep to group by - timestep is a string of the form "15min" or "30T"
def initGroupedDF(itemName, timeStep):
    return df

#timeInterval is a string that can be parsed into a time_delta object by parse_time

def clearMissingData(originalDF, TWADF, timeInterval):
    filteredDF = TWADF.copy()
    filteredDF["DataPresent"] = TWADF.apply(lambda row : applyCountDataRecorded(row['datetime'], parse_time(timeInterval), originalDF), axis = 1)
    filteredDF.loc[filteredDF["DataPresent"] == 0, ["BuyerPrice", "SellerPrice"]] = None
    filteredDF = filteredDF.drop(columns = "DataPresent")
    return filteredDF

%run PrimeItemParser.ipynb
#for name in primeSetNames:
    #timeStep = "15min"
    #df = initDF(name)
    #TWAdf = convertTWA(df, timeStep)
    #TWAdf = TWAdf.iloc[0:-1]
    #filteredDF = clearMissingData(df, TWAdf)
    #print(name)
    #display(filteredDF)

In [218]:
import ipywidgets as widgets
from IPython.display import display
x = widgets.Combobox(
    # value='John',
    placeholder='Choose A Warframe',
    options=primeSetNames,
    description='Item Name:',
    ensure_option=True,
    disabled=False,
    style=dict(description_width='initial')
)
y = widgets.Text(
    value='15',
    placeholder='15',
    description='Time Interval:',
    disabled=False,
    style=dict(description_width='initial')
)
z = widgets.Dropdown(
    options=[("Seconds", 's'), ("Minutes", 'T'), ("Hours", 'h')],
    value='T',
    description='Time Unit:',
    disabled=False,
    style=dict(description_width='initial')
)
timeOpDic = {"s" : "second", "T": "minute", "h": "hour"}

print("Uses data sampled 3 times per second (total, not per item)\nand displays the last 20 rows of a dataframe created by taking\nthe time-weighted average of the full dataframe of samples\nto show trends in prices over time.")
display(x)
display(y)
display(z)

Uses data sampled 3 times per second (total, not per item)
and displays the last 20 rows of a dataframe created by taking
the time-weighted average of the full dataframe of samples
to show trends in prices over time.


Combobox(value='', description='Item Name:', ensure_option=True, options=('ash_prime_set', 'atlas_prime_set', …

Text(value='15', description='Time Interval:', placeholder='15', style=TextStyle(description_width='initial'))

Dropdown(description='Time Unit:', index=1, options=(('Seconds', 's'), ('Minutes', 'T'), ('Hours', 'h')), styl…

In [226]:
from IPython.display import clear_output
import plotly.express as px
from plotly_resampler import register_plotly_resampler
import time

# Call the register function once and all Figures/FigureWidgets will be wrapped
# according to the register_plotly_resampler its `mode` argument
register_plotly_resampler(mode='auto')

pd.options.plotting.backend = "plotly"
dataframeButton = widgets.Button(description="Show Data!")
output = widgets.Output()

display(dataframeButton, output)

def on_button_clicked(b):
    with output:
        clear_output(wait=True)
        try:
            timeStep = y.value + z.value
            DF = initDF(x.value)
            TWADF = convertTWA(DF, timeStep)
            TWADF = TWADF.iloc[:-1]
            filteredDF = clearMissingData(DF, TWADF, timeStep)
            fig = filteredDF.plot(
                x='datetime', 
                y=['BuyerPrice', 'SellerPrice'], 
                title=f"Price Vs Time (Time Weighted Average)<br><sup>Averaged by {y.value} {timeOpDic[z.value]} intervals</sup>",
            )
            fig.show()
            on_button_clicked.data = (DF, TWADF, filteredDF, timeStep)
            print("*Gaps imply no data was collected during that time interval.")
            s = time.time()
            fig = DF.plot(
                x='datetime', 
                y=['BuyerPrice', 'SellerPrice'], 
                title=f"Price Vs Time (Raw)",
            )
            fig.show()
            e = time.time()
            print(e - s)
            return on_button_clicked.data
            
        except FileNotFoundError:
            print("Enter A Valid Warframe Name")
        except TypeError:
            print("Enter a smaller time interval, \nthere isn't enough data to support this interval yet.")
        


dataframeButton.on_click(on_button_clicked)

Button(description='Show Data!', style=ButtonStyle())

Output()

In [220]:
DF, TWADF, filteredDF, timeInterval = on_button_clicked.data

In [146]:
TWADF['Filtered'] = TWADF.apply(lambda row : applyFilterMissing(row['datetime'], timeInterval, DF), axis = 1)

In [147]:
TWADF

Unnamed: 0,datetime,BuyerPrice,SellerPrice,Filtered
0,2023-03-03 04:00:00,140.0,151.773333,6
1,2023-03-03 04:15:00,140.0,160.000000,11
2,2023-03-03 04:30:00,140.0,160.000000,10
3,2023-03-03 04:45:00,140.0,164.472222,11
4,2023-03-03 05:00:00,140.0,161.661111,13
...,...,...,...,...
78,2023-03-03 23:30:00,160.0,164.000000,14
79,2023-03-03 23:45:00,160.0,162.217778,16
80,2023-03-04 00:00:00,160.0,162.000000,17
81,2023-03-04 00:15:00,160.0,163.802222,15


In [187]:
test = pd.DataFrame(
    [
        32.9,
        29.83,
        45.76,
        16.22,
        17.33,
        23.4,
        150.12,
        100.29,
        38.45,
        67.12,
        20.0,
        58.41,
        58.32,
        59.89,
        100
    ],
    index=pd.to_datetime(
        [
            "2017-01-01 2:05:00",
            "2017-01-01 2:07:30",
            "2017-01-01 2:10:00",
            "2017-01-01 2:15:00",
            "2017-01-01 2:20:00",
            "2017-01-01 2:25:00",
            "2017-01-01 2:28:45",
            "2017-01-01 2:30:00",
            "2017-01-01 2:35:00",
            "2017-01-01 2:40:00",
            "2017-01-01 2:45:00",
            "2017-01-01 2:50:00",
            "2017-01-01 2:55:00",
            "2017-01-01 3:00:00",
            "2017-01-01 4:00:00"
        ]
    ),
).reset_index(names = "datetime")

testFilter = convertTWA(test, "15T")
testFilter

Unnamed: 0,datetime,0
0,2017-01-01 02:15:00,28.971667
1,2017-01-01 02:30:00,59.464167
2,2017-01-01 02:45:00,41.856667
3,2017-01-01 03:00:00,58.873333
4,2017-01-01 03:15:00,100.0
5,2017-01-01 03:30:00,100.0
6,2017-01-01 03:45:00,100.0
7,2017-01-01 04:00:00,100.0


In [188]:
testFilter["DataPresent"] = testFilter.apply(lambda row : applyFilterMissing(row['datetime'], parse_time("15T"), test), axis = 1)

In [208]:
testFilter.loc[testFilter["DataPresent"] == 0, [0, "DataPresent"]] = None

In [209]:
testFilter

Unnamed: 0,datetime,0,DataPresent
0,2017-01-01 02:15:00,28.971667,4.0
1,2017-01-01 02:30:00,59.464167,4.0
2,2017-01-01 02:45:00,41.856667,3.0
3,2017-01-01 03:00:00,58.873333,3.0
4,2017-01-01 03:15:00,,
5,2017-01-01 03:30:00,,
6,2017-01-01 03:45:00,,
7,2017-01-01 04:00:00,100.0,1.0
