From fd8bbb20cd74d8d5578cac82b03474576d41d356 Mon Sep 17 00:00:00 2001 From: Boreas514 Date: Wed, 23 Sep 2020 19:04:54 +0800 Subject: [PATCH 1/6] add most method --- .gitignore | 3 + python/fastquant/data/forex/__init__.py | 0 python/fastquant/data/forex/forextester.py | 132 +++++++++++++++++++++ python/fastquant/data/forex/forey.py | 44 +++++++ 4 files changed, 179 insertions(+) create mode 100644 python/fastquant/data/forex/__init__.py create mode 100644 python/fastquant/data/forex/forextester.py create mode 100644 python/fastquant/data/forex/forey.py diff --git a/.gitignore b/.gitignore index 94d18c2a..721d52d0 100644 --- a/.gitignore +++ b/.gitignore @@ -135,3 +135,6 @@ fastquant_0.0.0.9000.tar.gz # VSCode Workspace files *.code-workspace + +# PyCharm Workspace files +*.idea/ \ No newline at end of file diff --git a/python/fastquant/data/forex/__init__.py b/python/fastquant/data/forex/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/fastquant/data/forex/forextester.py b/python/fastquant/data/forex/forextester.py new file mode 100644 index 00000000..455fce55 --- /dev/null +++ b/python/fastquant/data/forex/forextester.py @@ -0,0 +1,132 @@ +import os +from pathlib import Path +import zipfile + +import requests +import pandas as pd +from pandas import DataFrame + +from fastquant.config import DATA_PATH + +def get_forextester_data(symbol, start_date, end_date, time_frame): + allowed_forex_symbol_list = [ + 'AUDJPY', + 'AUDUSD', + 'CHFJPY', + 'EURCAD', + 'EURCHF', + 'EURGBP', + 'EURJPY', + 'EURUSD', + 'GBPCHF', + 'GBPJPY', + 'GBPUSD', + 'NZDJPY', + 'NZDUSD', + 'USDCAD', + 'USDJPY', + 'USDCHF', + 'XAGUSD', + 'XAUUSD' + ] + if symbol not in allowed_forex_symbol_list: + raise ValueError('your symbol is not supported by forextester') + + print('start downloading forex data zip file...') + res = requests.get(f'http://www.forextester.com/templates/data/files/{symbol}.zip', stream=True) + with open(Path(DATA_PATH, f'{symbol}.zip'), 'wb') as forex_zip_file: + for chunk in res.iter_content(chunk_size=1024): + forex_zip_file.write(chunk) + + zip_file = zipfile.ZipFile(Path(DATA_PATH, f'{symbol}.zip')) + zip_file.extractall(Path(DATA_PATH)) + + tmp_dict = { + 'dt':[], + 'minute':[], + 'open':[], + 'high':[], + 'low':[], + 'close':[] + } + with open(Path(DATA_PATH, f'{symbol}.txt')) as forex_txt_file: + for line in forex_txt_file: + symbol, data, minute, open_price, high, low, close, vol = line.strip().split(',') + if data == '': + continue + tmp_dict['dt'].append(data+minute) + tmp_dict['open'].append(open_price) + tmp_dict['high'].append(high) + tmp_dict['low'].append(low) + tmp_dict['close'].append(close) + + forex_dataframe = DataFrame(tmp_dict, columns=list(tmp_dict.keys())) + forex_dataframe = forex_dataframe.reset_index() + forex_dataframe["dt"] = pd.to_datetime(forex_dataframe.dt) + return forex_dataframe.set_index('dt') + + + +def shape_data_from_1min_to_other(df, time_frame): + if time_frame not in ["M1", "M5", "M15", "H1", "D1", "W1"]: + raise ValueError('time_frame must in this list:["M1", "M5", "M15", "H1", "D1", "W1"]') + + tmp_dict = { + 'dt': [], + 'minute': [], + 'open': [], + 'high': [], + 'low': [], + 'close': [] + } + + if time_frame == 'M1': + return df + elif time_frame == 'M5': + pass + elif time_frame == 'M15': + tmp_datetime = None + tmp_hour = None + tmp_minute = None + tmp_open = None + tmp_high = None + tmp_low = None + + for index, row in df.iterrows(): + if tmp_open is None: + tmp_open = row['open'] + tmp_high = row['high'] + tmp_low = row['low'] + tmp_datetime = row['dt'] + tmp_hour = row['dt'].hour + tmp_minute = row['dt'].minute + else: + if row['high']>tmp_high: + tmp_high = row['high'] + if row['low']=15 and df[index+1:index+2]['dt'].values[0].hour == tmp_hour: + tmp_dict['dt'] = tmp_datetime + tmp_dict['open'] = tmp_open + tmp_dict['high'] = tmp_high + tmp_dict['low'] = tmp_low + tmp_dict['close'] = row['close'] + tmp_datetime = None + tmp_hour = None + tmp_minute = None + tmp_open = None + tmp_high = None + tmp_low = None + elif 1545: + pass + else: + pass + + else: + pass \ No newline at end of file diff --git a/python/fastquant/data/forex/forey.py b/python/fastquant/data/forex/forey.py new file mode 100644 index 00000000..2f2fc86b --- /dev/null +++ b/python/fastquant/data/forex/forey.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import numpy as np + +# Import from config +from fastquant.config import DATA_FORMAT_COLS + +# Import package +from fastquant.data.forex.forextester import get_forextester_data + + +def get_forex_data(symbol, start_date=None, end_date=None, source="forextester", time_frame='day'): + """Returns pricing data for a specified forex pair. + + Parameters + ---------- + symbol : str + Symbol of the forex in the forextester. + https://forextester.com/data/datasources + start_date : str + Starting date (YYYY-MM-DD) of the period that you want to get data on + in most cases we need more wide time period to test strategy, so keep this to None + end_date : str + Ending date (YYYY-MM-DD) of the period you want to get data on + in most cases we need more wide time period to test strategy, so keep this to None + source : str + Source of forex history data + time_frame : str + time frame you want, support 1 minute,15 minutes,1 hour,1 day,1 week + this parameter must one of them:["M1", "M5", "M15", "H1", "D1", "W1"] + + Returns + ------- + pandas.DataFrame + Forex data (in the specified `format`) for the specified company and date range + """ + + if source == "forextester": + df = get_forextester_data(symbol, start_date, end_date, time_frame) + else: + raise Exception("Source must be either 'phisix' or 'yahoo'") + + return df From 491e86b66e698405cee57113d61a15ca07dc688e Mon Sep 17 00:00:00 2001 From: Boreas514 Date: Thu, 1 Oct 2020 14:27:45 +0800 Subject: [PATCH 2/6] get forex data method done --- .gitignore | 9 +- .../data/forex/{forey.py => forex.py} | 17 +- python/fastquant/data/forex/forextester.py | 359 +++++++++++++++--- python/fastquant/data/forex/tests.py | 44 +++ 4 files changed, 372 insertions(+), 57 deletions(-) rename python/fastquant/data/forex/{forey.py => forex.py} (65%) create mode 100644 python/fastquant/data/forex/tests.py diff --git a/.gitignore b/.gitignore index 721d52d0..854da391 100644 --- a/.gitignore +++ b/.gitignore @@ -137,4 +137,11 @@ fastquant_0.0.0.9000.tar.gz *.code-workspace # PyCharm Workspace files -*.idea/ \ No newline at end of file +*.idea/ + +# tmp pickle test files +*.pickle + +# tmp forex data +/python/fastquant/data/*.txt +/python/fastquant/data/*.zip diff --git a/python/fastquant/data/forex/forey.py b/python/fastquant/data/forex/forex.py similarity index 65% rename from python/fastquant/data/forex/forey.py rename to python/fastquant/data/forex/forex.py index 2f2fc86b..e27afcd0 100644 --- a/python/fastquant/data/forex/forey.py +++ b/python/fastquant/data/forex/forex.py @@ -7,10 +7,10 @@ from fastquant.config import DATA_FORMAT_COLS # Import package -from fastquant.data.forex.forextester import get_forextester_data +from fastquant.data.forex.forextester import get_forextester_data, get_local_data -def get_forex_data(symbol, start_date=None, end_date=None, source="forextester", time_frame='day'): +def get_forex_data(symbol, start_date=None, end_date=None, source="forextester", time_frame='D1', read_from_local=False): """Returns pricing data for a specified forex pair. Parameters @@ -28,17 +28,22 @@ def get_forex_data(symbol, start_date=None, end_date=None, source="forextester", Source of forex history data time_frame : str time frame you want, support 1 minute,15 minutes,1 hour,1 day,1 week - this parameter must one of them:["M1", "M5", "M15", "H1", "D1", "W1"] + this parameter must one of them:["M1", "M15", "H1", "D1", "W1"] + read_from_local : bull + if this parameter set False, method get data from online + if set it to True, method get data from local pickle file, faster than set it to False Returns ------- pandas.DataFrame - Forex data (in the specified `format`) for the specified company and date range """ if source == "forextester": - df = get_forextester_data(symbol, start_date, end_date, time_frame) + if read_from_local is False: + df = get_forextester_data(symbol, start_date, end_date, time_frame) + else: + df = get_local_data(symbol, start_date, end_date, time_frame) else: - raise Exception("Source must be either 'phisix' or 'yahoo'") + raise Exception("Source must be forextester") return df diff --git a/python/fastquant/data/forex/forextester.py b/python/fastquant/data/forex/forextester.py index 455fce55..98c84ba4 100644 --- a/python/fastquant/data/forex/forextester.py +++ b/python/fastquant/data/forex/forextester.py @@ -1,14 +1,25 @@ import os from pathlib import Path import zipfile +import logging +import pickle import requests import pandas as pd from pandas import DataFrame from fastquant.config import DATA_PATH +from fastquant.data.stocks.pse import datestring_to_datetime -def get_forextester_data(symbol, start_date, end_date, time_frame): +handler_format = logging.Formatter('%(asctime)s-%(name)s-%(levelname)s-%(message)s',datefmt='%Y-%m-%d %H:%M:%S') +logger = logging.getLogger('forex_logger') +logger_handler = logging.StreamHandler() +logger_handler.setFormatter(handler_format) +logger.setLevel(logging.INFO) +logger.addHandler(logger_handler) + + +def get_forextester_data(symbol, start_date=None, end_date=None, time_frame='D1'): allowed_forex_symbol_list = [ 'AUDJPY', 'AUDUSD', @@ -32,48 +43,92 @@ def get_forextester_data(symbol, start_date, end_date, time_frame): if symbol not in allowed_forex_symbol_list: raise ValueError('your symbol is not supported by forextester') - print('start downloading forex data zip file...') + if start_date is not None: + start_date = datestring_to_datetime(start_date) + end_date = datestring_to_datetime(end_date) + + logger.info(f'start downloading forex {symbol} data zip file...') + file_size = 0 res = requests.get(f'http://www.forextester.com/templates/data/files/{symbol}.zip', stream=True) with open(Path(DATA_PATH, f'{symbol}.zip'), 'wb') as forex_zip_file: - for chunk in res.iter_content(chunk_size=1024): + for chunk in res.iter_content(chunk_size=4096): + file_size+=4096 + if file_size<1048576: + logger.info(f'download {round(file_size/1024)}KB') + else: + logger.info(f'download {round(file_size/1048576)}MB') forex_zip_file.write(chunk) - + logger.info('download success') zip_file = zipfile.ZipFile(Path(DATA_PATH, f'{symbol}.zip')) zip_file.extractall(Path(DATA_PATH)) + logger.info('unzip success') - tmp_dict = { - 'dt':[], - 'minute':[], - 'open':[], - 'high':[], - 'low':[], - 'close':[] + forex_dataframe = pd.read_csv( + Path(DATA_PATH, f'{symbol}.txt'), + dtype={'': str, '