Skip to content

Commit

Permalink
Improved freeform ticker queries and fixed bugs with ticker searches
Browse files Browse the repository at this point in the history
  • Loading branch information
saeedamen committed Jul 28, 2021
1 parent 94851e3 commit 4f0ac72
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 23 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ individual data providers)

# Coding log

* 28 Jul 2021
* Improved freeform ticker queries and fixed bug with downloading whole categories
* 22 Jul 2021
* Fixed S3 credentials management and added S3 file copy method
* Added roll costs
Expand Down
2 changes: 1 addition & 1 deletion findatapy/market/datavendorweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2434,7 +2434,7 @@ def load_ticker(self, market_data_request, index_col=0, max_workers=1):
def download_data_frame(data_source):
if data_engine is not None:

logger.info("Request " + market_data_request.data_source + " data via " + data_engine)
logger.info("Request " + str(market_data_request.data_source) + " data via " + str(data_engine))

# If a file path has been specified
if '*' in data_engine:
Expand Down
37 changes: 27 additions & 10 deletions findatapy/market/ioengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None
for fname_single in fname:
logger.debug("Reading " + fname_single + "..")

if engine == 'parquet' and '.gzip' not in fname_single and '.parquet' not in fname_single:
if engine == 'parquet' and '.gzip' not in fname_single and '.parquet' not in fname_single:
fname_single = fname_single + '.parquet'

if (engine == 'bcolz'):
Expand Down Expand Up @@ -716,6 +716,9 @@ def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None

data_frame_list.append(data_frame)

if len(data_frame_list) == 0:
return None

if len(data_frame_list) == 1:
return data_frame_list[0]

Expand Down Expand Up @@ -1232,19 +1235,33 @@ def read_csv(self, path, cloud_credentials=None, encoding='utf-8', encoding_erro
else:
return pd.read_csv(path, encoding=encoding)

def to_csv(self, df, path, cloud_credentials=None):
def to_csv(self, df, path, filename=None, cloud_credentials=None):
if cloud_credentials is None: cloud_credentials = constants.cloud_credentials

if "s3://" in path:
s3 = self._create_cloud_filesystem(cloud_credentials, 's3_filesystem')
if isinstance(path, list):
pass
else:
path = [path]

path_in_s3 = self.sanitize_path(path).replace("s3://", "")
if filename is not None:
new_path = []

# Use 'w' for py3, 'wb' for py2
with s3.open(path_in_s3, 'w') as f:
df.to_csv(f)
else:
df.to_csv(path)
for p in path:
new_path.append(self.path_join(p, filename))

path = new_path

for p in path:
if "s3://" in p:
s3 = self._create_cloud_filesystem(cloud_credentials, 's3_filesystem')

path_in_s3 = self.sanitize_path(p).replace("s3://", "")

# Use 'w' for py3, 'wb' for py2
with s3.open(path_in_s3, 'w') as f:
df.to_csv(f)
else:
df.to_csv(p)

def path_exists(self, path, cloud_credentials=None):
if cloud_credentials is None: cloud_credentials = constants.cloud_credentials
Expand Down
115 changes: 103 additions & 12 deletions findatapy/market/market.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def __init__(self, market_data_generator=None, md_request=None):
self._calculations = Calculations()
self.md_request = md_request

def fetch_market(self, md_request=None, md_request_df=None, md_request_str=None, start_date=None, finish_date=None):
def fetch_market(self, md_request=None, md_request_df=None, md_request_str=None, tickers=None,
start_date=None, finish_date=None, best_match_only=False, **kwargs):
"""Fetches market data for specific tickers
The user does not need to know to the low level API for each data provider works. The MarketDataRequest
Expand Down Expand Up @@ -91,23 +92,34 @@ def fetch_market(self, md_request=None, md_request_df=None, md_request_str=None,
if self.md_request is not None:
md_request = self.md_request

# Any kwargs are assumed to be to set MarketDataRequest attributes
if kwargs != {}:
md_request = self._kwargs_to_md_request(kwargs, md_request)

# When we have specified a string
if md_request_str is not None:
md_request = self.create_md_request_from_str(md_request_str, md_request=md_request,
start_date=start_date, finish_date=finish_date)
start_date=start_date, finish_date=finish_date, best_match_only=best_match_only, **kwargs)

return self.fetch_market(md_request)

# When we have specified predefined tickers
if tickers is not None:
md_request = self.create_md_request_from_tickers(tickers, md_request=md_request,
start_date=start_date, finish_date=finish_date, best_match_only=best_match_only, **kwargs)

return self.fetch_market(md_request)

# When we have specified a DataFrame with tickers
if md_request_df is not None:
md_request = self.create_md_request_from_dataframe(md_request_df, md_request=md_request,
start_date=start_date, finish_date=finish_date)
start_date=start_date, finish_date=finish_date, best_match_only=best_match_only, **kwargs)

return self.fetch_market(md_request)

# Or directly as a string
if isinstance(md_request, str):
md_request = self.create_md_request_from_str(md_request, start_date=start_date, finish_date=finish_date)
md_request = self.create_md_request_from_str(md_request, start_date=start_date, finish_date=finish_date, best_match_only=best_match_only, **kwargs)

return self.fetch_market(md_request)

Expand All @@ -119,7 +131,7 @@ def fetch_market(self, md_request=None, md_request_df=None, md_request_str=None,

# Or directly as a DataFrame
if isinstance(md_request, pd.DataFrame):
md_request = self.create_md_request_from_dataframe(md_request, start_date=start_date, finish_date=finish_date)
md_request = self.create_md_request_from_dataframe(md_request, start_date=start_date, finish_date=finish_date, **kwargs)

return self.fetch_market(md_request)

Expand Down Expand Up @@ -352,6 +364,9 @@ def fetch_market(self, md_request=None, md_request_df=None, md_request_str=None,
# the idea is that we do all the market data downloading here, rather than elsewhere

# By default: pass the market data request to MarketDataGenerator
if data_frame is not None:
data_frame = None

if data_frame is None:
data_frame = self._market_data_generator.fetch_market_data(md_request)

Expand All @@ -365,7 +380,8 @@ def fetch_market(self, md_request=None, md_request_df=None, md_request_str=None,

return data_frame

def create_md_request_from_dataframe(self, md_request_df, md_request=None, start_date=None, finish_date=None, smart_group=True):
def create_md_request_from_dataframe(self, md_request_df, md_request=None, start_date=None, finish_date=None, smart_group=True,
keep_initial_md_request_att_cols=['tickers', 'fields', 'freq'], **kwargs):

md_list = []

Expand All @@ -383,19 +399,33 @@ def create_md_request_from_dataframe(self, md_request_df, md_request=None, start
md_request_copy = MarketDataRequest(md_request=md_request)

for col, val in row.items():

try:
getattr(type(md_request_copy), col).fset(md_request_copy, val)
if ',' in val:
val = val.split(',')
except:
pass

# Only override the initial setting in md_request if they are None, or the user has specified this
if getattr(md_request_copy, col) is None or col not in keep_initial_md_request_att_cols:
try:
if isinstance(val, list):
val = self.flatten_list_of_lists(val)

getattr(type(md_request_copy), col).fset(md_request_copy, val)
except:
pass

if start_date is not None: md_request_copy.start_date = start_date
if finish_date is not None: md_request_copy.finish_date = finish_date

md_request_copy = self._kwargs_to_md_request(kwargs, md_request_copy)

md_list.append(md_request_copy)

return md_list

def create_md_request_from_dict(self, md_request_dict, md_request=None, start_date=None, finish_date=None):
def create_md_request_from_dict(self, md_request_dict, md_request=None, start_date=None, finish_date=None, **kwargs):

if md_request is None:
md_request = MarketDataRequest()
Expand All @@ -406,9 +436,59 @@ def create_md_request_from_dict(self, md_request_dict, md_request=None, start_da
if start_date is not None: md_request.start_date = start_date
if finish_date is not None: md_request.finish_date = finish_date

md_request = self._kwargs_to_md_request(kwargs, md_request)

return md_request

def create_md_request_from_str(self, md_request_str, md_request=None, start_date=None, finish_date=None, best_match_only=False, smart_group=True):
def create_md_request_from_tickers(self, tickers, md_request=None, start_date=None, finish_date=None,
best_match_only=False, smart_group=True, **kwargs):
md_request_list = []

if isinstance(tickers, str):
tickers = [tickers]

for t in tickers:
md_request_copy = self.create_md_request_from_str(
'_.' + t, md_request=md_request, start_date=start_date, finish_date=finish_date, best_match_only=best_match_only, smart_group=smart_group)

md_request_copy = self._kwargs_to_md_request(kwargs, md_request_copy)
md_request_list.append(md_request_copy)

return self.flatten_list_of_lists(md_request_list)

def _kwargs_to_md_request(self, kw, md_request):

if not(isinstance(md_request, list)):
# Any kwargs are assumed to be to set MarketDataRequest attributes
if kw != {}:

if md_request is None:
md_request = MarketDataRequest()

for k in kw.keys():
setattr(md_request, k, kw[k])

return md_request

# Any kwargs are assumed to be to set MarketDataRequest attributes
if kw != {}:
md_request_mod_list = []

for md in md_request:
if md is None:
md = MarketDataRequest()

for k in kw.keys():
setattr(md, k, kw[k])

md_request_mod_list.append(md)

md_request = md_request_mod_list

return md_request

def create_md_request_from_str(self, md_request_str, md_request=None, start_date=None, finish_date=None, best_match_only=False,
smart_group=True, **kwargs):

json_md_request = None

Expand Down Expand Up @@ -463,7 +543,10 @@ def create_md_request_from_str(self, md_request_str, md_request=None, start_date
if start_date is not None: md_request.start_date = start_date
if finish_date is not None: md_request.finish_date = finish_date

return self.create_md_request_from_freeform(md_request)
md_request = self.create_md_request_from_freeform(md_request)
md_request = self._kwargs_to_md_request(kwargs, md_request)

return md_request

# Otherwise we do a partial match of predefined tickers
elif environment == "_":
Expand All @@ -472,10 +555,16 @@ def create_md_request_from_str(self, md_request_str, md_request=None, start_date
best_match_only=best_match_only,
smart_group=smart_group)

return self.create_md_request_from_dataframe(md_request_df,
md_request_df = self.create_md_request_from_dataframe(md_request_df,
md_request=md_request, start_date=start_date,
finish_date=finish_date)

md_request_df = self._kwargs_to_md_request(kwargs, md_request_df)

# if best_match_only:
return md_request_df


else:
i = -1
environment = None
Expand Down Expand Up @@ -537,7 +626,7 @@ def create_md_request_from_str(self, md_request_str, md_request=None, start_date

return md_request

def create_md_request_from_freeform(self, md_request, freeform_md_request=None, return_df=False):
def create_md_request_from_freeform(self, md_request, freeform_md_request=None, return_df=False, **kwargs):

if freeform_md_request is None:
freeform_md_request = md_request.freeform_md_request
Expand Down Expand Up @@ -599,6 +688,8 @@ def create_md_request_from_freeform(self, md_request, freeform_md_request=None,

getattr(type(md_request_temp), k).fset(md_request_temp, lst)

md_request_temp = self._kwargs_to_md_request(kwargs, md_request_temp)

md_request_list.append(md_request_temp)

if len(md_request_list) == 1:
Expand Down
2 changes: 2 additions & 0 deletions findatapy/util/configmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,8 @@ def free_form_tickers_query(self, free_form_query, best_match_only=False, list_q

if list_query and isinstance(free_form_query, list):
free_form_query = free_form_query
elif ',' in free_form_query:
free_form_query = free_form_query.split(',')
else:
free_form_query = [free_form_query]

Expand Down

0 comments on commit 4f0ac72

Please sign in to comment.