Skip to content

Commit

Permalink
Added more flexible ticker calls
Browse files Browse the repository at this point in the history
  • Loading branch information
saeedamen committed May 26, 2021
1 parent 8f2ad8b commit 1a1e4d0
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 11 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ individual data providers)

# Coding log

* 26 May 2021
* Added more flexible ticker calls (with a '_' prefix)
* 23 May 2021
* Fixed various bugs with reading ECO_RELEASE_DT etc. dates from Bloomberg
* Fixed bugs when predefined ticker is defined differently in different categories
Expand Down
27 changes: 23 additions & 4 deletions findatapy/market/market.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,13 +365,15 @@ def fetch_market(self, md_request=None, md_request_df=None, md_request_str=None,

return data_frame

def create_md_request_from_dataframe(self, md_request_df, md_request=None, start_date=None, finish_date=None):
def create_md_request_from_dataframe(self, md_request_df, md_request=None, start_date=None, finish_date=None, smart_group=True):

md_list = []

# Aggregate/shrink dataframe grouping it by common attributes
# tickers, vendor_tickers, fields, vendor_fields
md_request_df = ConfigManager().get_instance().smart_group_dataframe_tickers(md_request_df, ret_fields=md_request_df.columns.tolist())
if smart_group:
md_request_df = ConfigManager().get_instance().smart_group_dataframe_tickers(
md_request_df, ret_fields=md_request_df.columns.tolist())

# Now populate MarketDataRequests based on the DataFrame
for index, row in md_request_df.iterrows():
Expand Down Expand Up @@ -406,7 +408,7 @@ def create_md_request_from_dict(self, md_request_dict, md_request=None, start_da

return md_request

def create_md_request_from_str(self, md_request_str, md_request=None, start_date=None, finish_date=None):
def create_md_request_from_str(self, md_request_str, md_request=None, start_date=None, finish_date=None, best_match_only=False, smart_group=True):

json_md_request = None

Expand All @@ -427,7 +429,10 @@ def create_md_request_from_str(self, md_request_str, md_request=None, start_date
except:
pass

# If we failed to parse as a JSON, let's try as string
if json_md_request is None:

# Create a list of input parameters for our MarketDataRequest
split_lst = []

word = ''
Expand Down Expand Up @@ -457,7 +462,8 @@ def create_md_request_from_str(self, md_request_str, md_request=None, start_date
if environment in constants.possible_data_environment:
i = 0

elif environment == 'raw':
# Otherwise, what if the user wants to specify each property manually?
elif environment == 'raw' or environment == 'r':
# Here the user can specify any tickers/fields etc. they want, they don't have to be predefined
# eg. raw.data_source.bloomberg.tickers.EURUSD.vendor_tickers.EURUSD Curncy
if md_request is None:
Expand All @@ -480,10 +486,23 @@ def create_md_request_from_str(self, md_request_str, md_request=None, start_date

return self.create_md_request_from_freeform(md_request)

# Otherwise we do a partial match of predefined tickers
elif environment == "_":
# Try a heuristic/approximate match eg. _.quandl.fx
md_request_df = ConfigManager().get_instance().free_form_tickers_query(md_request_params[1:],
best_match_only=best_match_only,
smart_group=smart_group)

return self.create_md_request_from_dataframe(md_request_df,
md_request=md_request, start_date=start_date,
finish_date=finish_date)

else:
i = -1
environment = None

# Otherwise the user has specified the MarketDataRequest str in the form
# category.data_source.freq.cut.tickers.field = fx.bloomberg.daily.NYC.EURUSD.close
category = md_request_params[i + 1]
data_source = md_request_params[i + 2]

Expand Down
117 changes: 111 additions & 6 deletions findatapy/util/configmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@
import csv
import pandas as pd


from findatapy.timeseries import Calculations
from findatapy.util.dataconstants import DataConstants
from findatapy.util.singleton import Singleton
from findatapy.util.loggermanager import LoggerManager

from dateutil.parser import parse

import re
Expand Down Expand Up @@ -170,7 +173,14 @@ def populate_time_series_dictionaries(data_constants=None):
else:
ConfigManager._dict_time_series_category_tickers_library_to_library[key] = [tickers]

ConfigManager._data_frame_time_series_tickers = pd.concat(df_tickers)
try:
df_tickers = pd.concat(df_tickers).sort_values(by=['category', 'data_source', 'freq', 'cut'])
except:
pass

df_tickers = df_tickers.reset_index().drop('level_0', axis=1).reset_index()

ConfigManager._data_frame_time_series_tickers = df_tickers

## Populate fields conversions
reader = csv.DictReader(open(data_constants.time_series_fields_list))
Expand Down Expand Up @@ -249,11 +259,96 @@ def free_form_tickers_regex_query(self, category=None, data_source=None, freq=No

return df

def free_form_tickers_query(self, free_form_query, best_match_only=False,
ret_fields=['category', 'data_source', 'freq', 'cut', 'tickers', 'vendor_tickers', 'fields'],
smart_group=True):
"""From a string or list of properties for predefined tickers, we create a DataFrame that can be used to populate a
MarketDataRequest. We search through all the predefined tickers, and "guess" any matches to our query, without
having to use the standard query format which consists of category.data_source.freq.cut.ticker such as this example
fx.bloomberg.daily.NYC.EURUSD.close
eg. quandl.fx will match all tickers which are from "quandl" and have a "category" fx
We must be careful to make sure that categories, data_sources etc. are unique and do not overlap with other properties
like tickers
Parameters
----------
free_form_query : str
A query that can be used to generate a MarketDataRequest
eg. quandl.fx
best_match_only : bool
Only return at most 1 row of a DataFrame (default: False)
ret_fields : str(list)
Which properties of a MarketDataRequest to return
smart_group : bool
Smart group tickers of a particular category in a specific row
Returns
-------
DataFrame
"""
logger = LoggerManager().getLogger(__name__)

if isinstance(free_form_query, str):
keywords = free_form_query.split('.')
else:
keywords = free_form_query

logger.info("Finding ticker combination which matches " + str(free_form_query))

df = ConfigManager._data_frame_time_series_tickers

df_joined = df

# Search through all the keywords, and see if matches with any columns of our predefined tickers
try:
for k in keywords:
for c in df.columns:
try:
df_temp = df_joined[df_joined[c] == k]
except:
df_temp = pd.DataFrame()

if not(df_temp.empty):
df_joined = df_temp
break

df = df_joined
except Exception as e:
return None

if len(df.index) > 1:
logger.info("Found multiple matches for ticker combination, first trying smart group...")

if smart_group:
df = self.smart_group_dataframe_tickers(df, ret_fields=ret_fields)

if best_match_only:
logger.info("Taking only top match...")
df = pd.DataFrame(df.head(1))

return df

@staticmethod
def smart_group_dataframe_tickers(df, ret_fields=['category', 'data_source', 'freq', 'cut']):
"""Groups together a dataframe of metadata associated with assets
"""Groups together a DataFrame of metadata associated with assets, which can be used to create MarketDataRequest
objects
"""

if ret_fields is None:
ret_fields = df.columns.to_list()
elif isinstance(ret_fields, 'str'):
if ret_fields == 'all':
ret_fields = df.columns.to_list()
elif isinstance(ret_fields, list):
if ret_fields == []:
ret_fields = df.columns.to_list()

if set(['category', 'data_source', 'freq', 'cut']).issubset(ret_fields):
group_fields = ret_fields.copy()

Expand All @@ -270,12 +365,22 @@ def smart_group_dataframe_tickers(df, ret_fields=['category', 'data_source', 'fr
group_fields.remove('vendor_tickers')

if agg_dict != {}:
df = df.groupby(group_fields).agg(agg_dict)

for i, g in enumerate(group_fields):
df[g] = df.index.get_level_values(i)
try:
df = df.drop(data_constants.drop_cols_smart_tickers_grouping, axis=1)
except:
pass

df_temp = df.groupby(group_fields).agg(agg_dict)

# If grouping fails (when there aren't multiple elements to group!)
if df_temp.empty:
pass
else:
for i, g in enumerate(group_fields):
df_temp[g] = df_temp.index.get_level_values(i)

df = df.reset_index(drop=True)
df = df_temp.reset_index(drop=True)

return df

Expand Down
3 changes: 3 additions & 0 deletions findatapy/util/dataconstants.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ class DataConstants(object):
events_category = 'events'
events_category_dt = 'events_dt'

# Ignore these columns when doing smart grouping
drop_cols_smart_tickers_grouping = ['level_0']

###### FOR CURRENT VERSION

# which marketdatagenerator type to use?
Expand Down
15 changes: 14 additions & 1 deletion findatapy_examples/freeform_md_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,21 @@
# We can also create MarketDataRequest by passing a str that relates to predefined tickers
md_request = market.create_md_request_from_str('backtest.fx.bloomberg.daily.NYC.EURUSD.close')

print(md_request)

# We can do an approximate match for predefined tickers, and findatapy will "guess" the closest match
md_request = market.create_md_request_from_str('_.bloomberg.EURUSD.NYC')

print(md_request)

# We get *all* the predefined tickers which match in any column with quandl and fx, and these be smart grouped
# into the smallest number of requests
md_request = market.create_md_request_from_str('_.quandl.fx', best_match_only=False, smart_group=True)

print(md_request)

# We can also create MarketDataRequest by passing a str for an arbitrary Parquet, note, if we have dots in the path
# we need to use {} to denote them
# we need to use {} to denote them (we can use either 'raw' to denote this or just 'r')
md_request = market.create_md_request_from_str('raw.data_source.{c:\parquet_files\dump.parquet}.tickers.EURUSD')

print(md_request)
Expand Down

0 comments on commit 1a1e4d0

Please sign in to comment.