Skip to content

Commit

Permalink
Move trim_dataframe from history to converter
Browse files Browse the repository at this point in the history
  • Loading branch information
xmatthias committed Dec 28, 2019
1 parent 9d8ea2f commit 416517b
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 77 deletions.
19 changes: 19 additions & 0 deletions freqtrade/data/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
Functions to convert data from one format to another
"""
import logging
from datetime import datetime, timezone

import pandas as pd
from pandas import DataFrame, to_datetime

from freqtrade.configuration.timerange import TimeRange

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -112,6 +114,23 @@ def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str)
return df


def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame:
"""
Trim dataframe based on given timerange
:param df: Dataframe to trim
:param timerange: timerange (use start and end date if available)
:param: df_date_col: Column in the dataframe to use as Date column
:return: trimmed dataframe
"""
if timerange.starttype == 'date':
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
df = df.loc[df[df_date_col] >= start, :]
if timerange.stoptype == 'date':
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
df = df.loc[df[df_date_col] <= stop, :]
return df


def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
"""
Gets order book list, returns dataframe with below format per suggested by creslin
Expand Down
3 changes: 1 addition & 2 deletions freqtrade/data/datahandlers/jsondatahandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@

from freqtrade import misc
from freqtrade.configuration import TimeRange
from freqtrade.data.converter import clean_ohlcv_dataframe
from freqtrade.data.history import trim_dataframe
from freqtrade.data.converter import clean_ohlcv_dataframe, trim_dataframe

from .idatahandler import IDataHandler

Expand Down
26 changes: 0 additions & 26 deletions freqtrade/data/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,6 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
return tickerlist[start_index:stop_index]


def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame:
"""
Trim dataframe based on given timerange
:param df: Dataframe to trim
:param timerange: timerange (use start and end date if available)
:param: df_date_col: Column in the dataframe to use as Date column
:return: trimmed dataframe
"""
if timerange.starttype == 'date':
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
df = df.loc[df[df_date_col] >= start, :]
if timerange.stoptype == 'date':
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
df = df.loc[df[df_date_col] <= stop, :]
return df


def load_tickerdata_file(datadir: Path, pair: str, timeframe: str,
timerange: Optional[TimeRange] = None) -> List[Dict]:
"""
Expand Down Expand Up @@ -115,15 +98,6 @@ def store_trades_file(datadir: Path, pair: str,
misc.file_dump_json(filename, data, is_zip=is_zip)


def _validate_pairdata(pair, pairdata, timerange: TimeRange):
if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000:
logger.warning('Missing data at start for pair %s, data starts at %s',
pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000:
logger.warning('Missing data at end for pair %s, data ends at %s',
pair, arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))


def load_pair_history(pair: str,
timeframe: str,
datadir: Path, *,
Expand Down
3 changes: 2 additions & 1 deletion freqtrade/optimize/backtesting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from freqtrade.configuration import (TimeRange, remove_credentials,
validate_config_consistency)
from freqtrade.data import history
from freqtrade.data.converter import trim_dataframe
from freqtrade.data.dataprovider import DataProvider
from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds
from freqtrade.misc import file_dump_json
Expand Down Expand Up @@ -482,7 +483,7 @@ def start(self) -> None:

# Trim startup period from analyzed dataframe
for pair, df in preprocessed.items():
preprocessed[pair] = history.trim_dataframe(df, timerange)
preprocessed[pair] = trim_dataframe(df, timerange)
min_date, max_date = history.get_timerange(preprocessed)

logger.info(
Expand Down
3 changes: 2 additions & 1 deletion freqtrade/optimize/hyperopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from pandas import DataFrame

from freqtrade import OperationalException
from freqtrade.data.history import get_timerange, trim_dataframe
from freqtrade.data.history import get_timerange
from freqtrade.data.converter import trim_dataframe
from freqtrade.misc import plural, round_dict
from freqtrade.optimize.backtesting import Backtesting
# Import IHyperOpt and IHyperOptLoss to allow unpickling classes from these modules
Expand Down
3 changes: 2 additions & 1 deletion freqtrade/plot/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
from freqtrade.configuration import TimeRange
from freqtrade.data import history
from freqtrade.data.converter import trim_dataframe
from freqtrade.data.btanalysis import (combine_tickers_with_mean,
create_cum_profit,
extract_trades_of_period, load_trades)
Expand Down Expand Up @@ -47,7 +48,7 @@ def init_plotscript(config):
db_url=config.get('db_url'),
exportfilename=config.get('exportfilename'),
)
trades = history.trim_dataframe(trades, timerange, 'open_time')
trades = trim_dataframe(trades, timerange, 'open_time')
return {"tickers": tickers,
"trades": trades,
"pairs": pairs,
Expand Down
47 changes: 45 additions & 2 deletions tests/data/test_converter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# pragma pylint: disable=missing-docstring, C0103
import logging

from freqtrade.data.converter import parse_ticker_dataframe, ohlcv_fill_up_missing_data
from freqtrade.data.history import load_pair_history, validate_backtest_data, get_timerange
from freqtrade.configuration.timerange import TimeRange
from freqtrade.data.converter import (ohlcv_fill_up_missing_data,
parse_ticker_dataframe, trim_dataframe)
from freqtrade.data.history import (get_timerange, load_data,
load_pair_history, validate_backtest_data)
from tests.conftest import log_has


Expand Down Expand Up @@ -145,3 +148,43 @@ def test_ohlcv_drop_incomplete(caplog):
assert len(data) == 3

assert log_has("Dropping last candle", caplog)


def test_trim_dataframe(testdatadir) -> None:
data = load_data(
datadir=testdatadir,
timeframe='1m',
pairs=['UNITTEST/BTC']
)['UNITTEST/BTC']
min_date = int(data.iloc[0]['date'].timestamp())
max_date = int(data.iloc[-1]['date'].timestamp())
data_modify = data.copy()

# Remove first 30 minutes (1800 s)
tr = TimeRange('date', None, min_date + 1800, 0)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[-1] == data.iloc[-1])
assert all(data_modify.iloc[0] == data.iloc[30])

data_modify = data.copy()
# Remove last 30 minutes (1800 s)
tr = TimeRange(None, 'date', 0, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[0] == data.iloc[0])
assert all(data_modify.iloc[-1] == data.iloc[-31])

data_modify = data.copy()
# Remove first 25 and last 30 minutes (1800 s)
tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 55
# first row matches 25th original row
assert all(data_modify.iloc[0] == data.iloc[25])
46 changes: 2 additions & 44 deletions tests/data/test_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@
load_tickerdata_file, pair_data_filename,
pair_trades_filename,
refresh_backtest_ohlcv_data,
refresh_backtest_trades_data,
refresh_data,
trim_dataframe, trim_tickerlist,
validate_backtest_data)
refresh_backtest_trades_data, refresh_data,
trim_tickerlist, validate_backtest_data)
from freqtrade.exchange import timeframe_to_minutes
from freqtrade.misc import file_dump_json
from freqtrade.strategy.default_strategy import DefaultStrategy
Expand Down Expand Up @@ -444,46 +442,6 @@ def test_trim_tickerlist(testdatadir) -> None:
assert not ticker


def test_trim_dataframe(testdatadir) -> None:
data = load_data(
datadir=testdatadir,
timeframe='1m',
pairs=['UNITTEST/BTC']
)['UNITTEST/BTC']
min_date = int(data.iloc[0]['date'].timestamp())
max_date = int(data.iloc[-1]['date'].timestamp())
data_modify = data.copy()

# Remove first 30 minutes (1800 s)
tr = TimeRange('date', None, min_date + 1800, 0)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[-1] == data.iloc[-1])
assert all(data_modify.iloc[0] == data.iloc[30])

data_modify = data.copy()
# Remove last 30 minutes (1800 s)
tr = TimeRange(None, 'date', 0, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[0] == data.iloc[0])
assert all(data_modify.iloc[-1] == data.iloc[-31])

data_modify = data.copy()
# Remove first 25 and last 30 minutes (1800 s)
tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 55
# first row matches 25th original row
assert all(data_modify.iloc[0] == data.iloc[25])


def test_file_dump_json_tofile(testdatadir) -> None:
file = testdatadir / 'test_{id}.json'.format(id=str(uuid.uuid4()))
data = {'bar': 'foo'}
Expand Down

0 comments on commit 416517b

Please sign in to comment.