Merge pull request #2138 from freqtrade/history_docstrings

Refactorings to history
freqtrade · Aug 20, 2019 · 9e8ca8d · 9e8ca8d
2 parents 491d742 + 09286d4
commit 9e8ca8d
Show file tree

Hide file tree

Showing 6 changed files with 53 additions and 62 deletions.
diff --git a/freqtrade/data/history.py b/freqtrade/data/history.py
@@ -43,7 +43,7 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
             start_index += 1
 
     if timerange.stoptype == 'line':
-        start_index = len(tickerlist) + timerange.stopts
+        start_index = max(len(tickerlist) + timerange.stopts, 0)
     if timerange.stoptype == 'index':
         stop_index = timerange.stopts
     elif timerange.stoptype == 'date':
@@ -57,24 +57,31 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
     return tickerlist[start_index:stop_index]
 
 
-def load_tickerdata_file(
-        datadir: Optional[Path], pair: str,
-        ticker_interval: str,
-        timerange: Optional[TimeRange] = None) -> Optional[list]:
+def load_tickerdata_file(datadir: Optional[Path], pair: str, ticker_interval: str,
+                         timerange: Optional[TimeRange] = None) -> Optional[list]:
     """
     Load a pair from file, either .json.gz or .json
     :return: tickerlist or None if unsuccesful
     """
     filename = pair_data_filename(datadir, pair, ticker_interval)
     pairdata = misc.file_load_json(filename)
     if not pairdata:
-        return None
+        return []
 
     if timerange:
         pairdata = trim_tickerlist(pairdata, timerange)
     return pairdata
 
 
+def store_tickerdata_file(datadir: Optional[Path], pair: str,
+                          ticker_interval: str, data: list, is_zip: bool = False):
+    """
+    Stores tickerdata to file
+    """
+    filename = pair_data_filename(datadir, pair, ticker_interval)
+    misc.file_dump_json(filename, data, is_zip=is_zip)
+
+
 def load_pair_history(pair: str,
                       ticker_interval: str,
                       datadir: Optional[Path],
@@ -177,11 +184,14 @@ def pair_data_filename(datadir: Optional[Path], pair: str, ticker_interval: str)
     return filename
 
 
-def load_cached_data_for_updating(filename: Path, ticker_interval: str,
+def load_cached_data_for_updating(datadir: Optional[Path], pair: str, ticker_interval: str,
                                   timerange: Optional[TimeRange]) -> Tuple[List[Any],
                                                                            Optional[int]]:
     """
-    Load cached data and choose what part of the data should be updated
+    Load cached data to download more data.
+    If timerange is passed in, checks wether data from an before the stored data will be downloaded.
+    If that's the case than what's available should be completely overwritten.
+    Only used by download_pair_history().
     """
 
     since_ms = None
@@ -195,12 +205,11 @@ def load_cached_data_for_updating(filename: Path, ticker_interval: str,
             since_ms = arrow.utcnow().shift(minutes=num_minutes).timestamp * 1000
 
     # read the cached file
-    if filename.is_file():
-        with open(filename, "rt") as file:
-            data = misc.json_load(file)
-        # remove the last item, could be incomplete candle
-        if data:
-            data.pop()
+    # Intentionally don't pass timerange in - since we need to load the full dataset.
+    data = load_tickerdata_file(datadir, pair, ticker_interval)
+    # remove the last item, could be incomplete candle
+    if data:
+        data.pop()
     else:
         data = []
 
@@ -239,14 +248,12 @@ def download_pair_history(datadir: Optional[Path],
         )
 
     try:
-        filename = pair_data_filename(datadir, pair, ticker_interval)
-
         logger.info(
             f'Download history data for pair: "{pair}", interval: {ticker_interval} '
             f'and store in {datadir}.'
         )
 
-        data, since_ms = load_cached_data_for_updating(filename, ticker_interval, timerange)
+        data, since_ms = load_cached_data_for_updating(datadir, pair, ticker_interval, timerange)
 
         logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None')
         logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None')
@@ -262,7 +269,7 @@ def download_pair_history(datadir: Optional[Path],
         logger.debug("New Start: %s", misc.format_ms_time(data[0][0]))
         logger.debug("New End: %s", misc.format_ms_time(data[-1][0]))
 
-        misc.file_dump_json(filename, data)
+        store_tickerdata_file(datadir, pair, ticker_interval, data=data)
         return True
 
     except Exception as e:

diff --git a/freqtrade/misc.py b/freqtrade/misc.py
@@ -5,11 +5,11 @@
 import logging
 import re
 from datetime import datetime
+from pathlib import Path
 
 import numpy as np
 import rapidjson
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -39,7 +39,7 @@ def datesarray_to_datetimearray(dates: np.ndarray) -> np.ndarray:
     return dates.dt.to_pydatetime()
 
 
-def file_dump_json(filename, data, is_zip=False) -> None:
+def file_dump_json(filename: Path, data, is_zip=False) -> None:
     """
     Dump JSON data into a file
     :param filename: file to create
@@ -49,8 +49,8 @@ def file_dump_json(filename, data, is_zip=False) -> None:
     logger.info(f'dumping json to "{filename}"')
 
     if is_zip:
-        if not filename.endswith('.gz'):
-            filename = filename + '.gz'
+        if filename.suffix != '.gz':
+            filename = filename.with_suffix('.gz')
         with gzip.open(filename, 'w') as fp:
             rapidjson.dump(data, fp, default=str, number_mode=rapidjson.NM_NATIVE)
     else:

diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py
@@ -190,7 +190,7 @@ def _generate_text_table_strategy(self, all_results: dict) -> str:
         return tabulate(tabular_data, headers=headers,  # type: ignore
                         floatfmt=floatfmt, tablefmt="pipe")
 
-    def _store_backtest_result(self, recordfilename: str, results: DataFrame,
+    def _store_backtest_result(self, recordfilename: Path, results: DataFrame,
                                strategyname: Optional[str] = None) -> None:
 
         records = [(t.pair, t.profit_percent, t.open_time.timestamp(),
@@ -201,10 +201,10 @@ def _store_backtest_result(self, recordfilename: str, results: DataFrame,
         if records:
             if strategyname:
                 # Inject strategyname to filename
-                recname = Path(recordfilename)
-                recordfilename = str(Path.joinpath(
-                    recname.parent, f'{recname.stem}-{strategyname}').with_suffix(recname.suffix))
-            logger.info('Dumping backtest results to %s', recordfilename)
+                recordfilename = Path.joinpath(
+                    recordfilename.parent,
+                    f'{recordfilename.stem}-{strategyname}').with_suffix(recordfilename.suffix)
+            logger.info(f'Dumping backtest results to {recordfilename}')
             file_dump_json(recordfilename, records)
 
     def _get_ticker_list(self, processed) -> Dict[str, DataFrame]:
@@ -458,7 +458,7 @@ def start(self) -> None:
         for strategy, results in all_results.items():
 
             if self.config.get('export', False):
-                self._store_backtest_result(self.config['exportfilename'], results,
+                self._store_backtest_result(Path(self.config['exportfilename']), results,
                                             strategy if len(self.strategylist) > 1 else None)
 
             print(f"Result for strategy {strategy}")

diff --git a/freqtrade/tests/data/test_history.py b/freqtrade/tests/data/test_history.py
@@ -178,89 +178,71 @@ def test_load_cached_data_for_updating(mocker) -> None:
     # timeframe starts earlier than the cached data
     # should fully update data
     timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
-    data, start_ts = load_cached_data_for_updating(test_filename,
-                                                   '1m',
-                                                   timerange)
+    data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
     assert data == []
     assert start_ts == test_data[0][0] - 1000
 
     # same with 'line' timeframe
     num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 120
-    data, start_ts = load_cached_data_for_updating(test_filename,
-                                                   '1m',
+    data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m',
                                                    TimeRange(None, 'line', 0, -num_lines))
     assert data == []
     assert start_ts < test_data[0][0] - 1
 
     # timeframe starts in the center of the cached data
     # should return the chached data w/o the last item
     timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0)
-    data, start_ts = load_cached_data_for_updating(test_filename,
-                                                   '1m',
-                                                   timerange)
+    data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
     assert data == test_data[:-1]
     assert test_data[-2][0] < start_ts < test_data[-1][0]
 
     # same with 'line' timeframe
     num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 30
     timerange = TimeRange(None, 'line', 0, -num_lines)
-    data, start_ts = load_cached_data_for_updating(test_filename,
-                                                   '1m',
-                                                   timerange)
+    data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
     assert data == test_data[:-1]
     assert test_data[-2][0] < start_ts < test_data[-1][0]
 
     # timeframe starts after the chached data
     # should return the chached data w/o the last item
     timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 1, 0)
-    data, start_ts = load_cached_data_for_updating(test_filename,
-                                                   '1m',
-                                                   timerange)
+    data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
     assert data == test_data[:-1]
     assert test_data[-2][0] < start_ts < test_data[-1][0]
 
-    # same with 'line' timeframe
+    # Try loading last 30 lines.
+    # Not supported by load_cached_data_for_updating, we always need to get the full data.
     num_lines = 30
     timerange = TimeRange(None, 'line', 0, -num_lines)
-    data, start_ts = load_cached_data_for_updating(test_filename,
-                                                   '1m',
-                                                   timerange)
+    data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
     assert data == test_data[:-1]
     assert test_data[-2][0] < start_ts < test_data[-1][0]
 
     # no timeframe is set
     # should return the chached data w/o the last item
     num_lines = 30
     timerange = TimeRange(None, 'line', 0, -num_lines)
-    data, start_ts = load_cached_data_for_updating(test_filename,
-                                                   '1m',
-                                                   timerange)
+    data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
     assert data == test_data[:-1]
     assert test_data[-2][0] < start_ts < test_data[-1][0]
 
     # no datafile exist
     # should return timestamp start time
     timerange = TimeRange('date', None, now_ts - 10000, 0)
-    data, start_ts = load_cached_data_for_updating(test_filename.with_name('unexist'),
-                                                   '1m',
-                                                   timerange)
+    data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
     assert data == []
     assert start_ts == (now_ts - 10000) * 1000
 
     # same with 'line' timeframe
     num_lines = 30
     timerange = TimeRange(None, 'line', 0, -num_lines)
-    data, start_ts = load_cached_data_for_updating(test_filename.with_name('unexist'),
-                                                   '1m',
-                                                   timerange)
+    data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
     assert data == []
     assert start_ts == (now_ts - num_lines * 60) * 1000
 
     # no datafile exist, no timeframe is set
     # should return an empty array and None
-    data, start_ts = load_cached_data_for_updating(test_filename.with_name('unexist'),
-                                                   '1m',
-                                                   None)
+    data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', None)
     assert data == []
     assert start_ts is None
 

diff --git a/freqtrade/tests/optimize/test_backtesting.py b/freqtrade/tests/optimize/test_backtesting.py
@@ -2,6 +2,7 @@
 
 import math
 import random
+from pathlib import Path
 from unittest.mock import MagicMock
 
 import numpy as np
@@ -785,10 +786,10 @@ def test_backtest_record(default_conf, fee, mocker):
     # reset test to test with strategy name
     names = []
     records = []
-    backtesting._store_backtest_result("backtest-result.json", results, "DefStrat")
+    backtesting._store_backtest_result(Path("backtest-result.json"), results, "DefStrat")
     assert len(results) == 4
     # Assert file_dump_json was only called once
-    assert names == ['backtest-result-DefStrat.json']
+    assert names == [Path('backtest-result-DefStrat.json')]
     records = records[0]
     # Ensure records are of correct type
     assert len(records) == 4

diff --git a/freqtrade/tests/test_misc.py b/freqtrade/tests/test_misc.py
@@ -1,6 +1,7 @@
 # pragma pylint: disable=missing-docstring,C0103
 
 import datetime
+from pathlib import Path
 from unittest.mock import MagicMock
 
 from freqtrade.data.converter import parse_ticker_dataframe
@@ -34,12 +35,12 @@ def test_datesarray_to_datetimearray(ticker_history_list):
 def test_file_dump_json(mocker) -> None:
     file_open = mocker.patch('freqtrade.misc.open', MagicMock())
     json_dump = mocker.patch('rapidjson.dump', MagicMock())
-    file_dump_json('somefile', [1, 2, 3])
+    file_dump_json(Path('somefile'), [1, 2, 3])
     assert file_open.call_count == 1
     assert json_dump.call_count == 1
     file_open = mocker.patch('freqtrade.misc.gzip.open', MagicMock())
     json_dump = mocker.patch('rapidjson.dump', MagicMock())
-    file_dump_json('somefile', [1, 2, 3], True)
+    file_dump_json(Path('somefile'), [1, 2, 3], True)
     assert file_open.call_count == 1
     assert json_dump.call_count == 1