Skip to content

Commit

Permalink
Improved RetStats and comments
Browse files Browse the repository at this point in the history
  • Loading branch information
saeedamen committed May 6, 2020
1 parent af326aa commit 55eaca5
Show file tree
Hide file tree
Showing 7 changed files with 187 additions and 164 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,14 @@ In findatapy/examples you will find several demos

# Release Notes

* No formal releases yet
* 0.1.12 - findatapy (06 May 2020)

# Coding log

* 06 May 2020
* Amended function to remove points outside FX hours to exclude 1 Jan every year
* RetStats can now resample time series (removed kurtosis)
* Tidy up some code comments
* 07 Apr 2020
* Bug fix in constants
* 06 Apr 2020
Expand Down
272 changes: 143 additions & 129 deletions findatapy/market/market.py

Large diffs are not rendered by default.

47 changes: 23 additions & 24 deletions findatapy/market/marketdatagenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ class MarketDataGenerator(object):
At present it supports Bloomberg (bloomberg), Yahoo (yahoo), Quandl (quandl), FRED (fred) etc. which are implemented
in subclasses of DataVendor class. This provides a common wrapper for all these data sources.
"""

def __init__(self):
Expand Down Expand Up @@ -189,28 +188,28 @@ def fetch_market_data(self, market_data_request, kill_session = True):
if('internet_load' in market_data_request.cache_algo):
self.logger.debug("Internet loading.. ")

# signal to data_vendor template to exit session
# Signal to data_vendor template to exit session
# if data_vendor is not None and kill_session == True: data_vendor.kill_session()

if(market_data_request.cache_algo == 'cache_algo'):
self.logger.debug("Only caching data in memory, do not return any time series."); return

# only return time series if specified in the algo
# Only return time series if specified in the algo
if 'return' in market_data_request.cache_algo:
# special case for events/events-dt which is not indexed like other tables (also same for downloading futures
# Special case for events/events-dt which is not indexed like other tables (also same for downloading futures
# contracts dates)
if market_data_request.category is not None:
if 'events' in market_data_request.category:
return data_frame_agg

# pad columns a second time (is this necessary to do here again?)
# Pad columns a second time (is this necessary to do here again?)
# TODO only do this for not daily data?
try:
if data_frame_agg is not None:
data_frame_agg = self.filter.filter_time_series(market_data_request, data_frame_agg, pad_columns=True)\
.dropna(how = 'all')

# resample data using pandas if specified in the MarketDataRequest
# Resample data using pandas if specified in the MarketDataRequest
if market_data_request.resample is not None:
if 'last' in market_data_request.resample_how:
data_frame_agg = data_frame_agg.resample(market_data_request.resample).last()
Expand Down Expand Up @@ -272,7 +271,7 @@ def download_intraday_tick(self, market_data_request):

data_frame_group = []

# single threaded version
# Single threaded version
# handle intraday ticker calls separately one by one
if len(market_data_request.tickers) == 1 or constants.market_thread_no['other'] == 1:
for ticker in market_data_request.tickers:
Expand All @@ -283,7 +282,7 @@ def download_intraday_tick(self, market_data_request):
market_data_request_single.vendor_tickers = [market_data_request.vendor_tickers[ticker_cycle]]
ticker_cycle = ticker_cycle + 1

# we downscale into float32, to avoid memory problems in Python (32 bit)
# We downscale into float32, to avoid memory problems in Python (32 bit)
# data is stored on disk as float32 anyway
# old_finish_date = market_data_request_single.finish_date
#
Expand All @@ -300,7 +299,7 @@ def download_intraday_tick(self, market_data_request):

data_frame_single = self.fetch_single_time_series(market_data_request)

# if the vendor doesn't provide any data, don't attempt to append
# If the vendor doesn't provide any data, don't attempt to append
if data_frame_single is not None:
if data_frame_single.empty == False:
data_frame_single.index.name = 'Date'
Expand All @@ -319,7 +318,7 @@ def download_intraday_tick(self, market_data_request):
# self._time_series_cache[fname] = data_frame_agg # cache in memory (disable for intraday)


# if you call for returning multiple tickers, be careful with memory considerations!
# If you call for returning multiple tickers, be careful with memory considerations!
if data_frame_group is not None:
data_frame_agg = calcuations.pandas_outer_join(data_frame_group)

Expand All @@ -328,7 +327,7 @@ def download_intraday_tick(self, market_data_request):
else:
market_data_request_list = []

# create a list of MarketDataRequests
# Create a list of MarketDataRequests
for ticker in market_data_request.tickers:
market_data_request_single = copy.copy(market_data_request)
market_data_request_single.tickers = ticker
Expand All @@ -345,7 +344,7 @@ def fetch_single_time_series(self, market_data_request):

market_data_request = MarketDataRequest(md_request=market_data_request)

# only includes those tickers have not expired yet!
# Only includes those tickers have not expired yet!
start_date = pandas.Timestamp(market_data_request.start_date).date()

import datetime
Expand All @@ -361,7 +360,7 @@ def fetch_single_time_series(self, market_data_request):

config = ConfigManager().get_instance()

# in many cases no expiry is defined so skip them
# In many cases no expiry is defined so skip them
for i in range(0, len(tickers)):
try:
expiry_date = config.get_expiry_for_ticker(market_data_request.data_source, tickers[i])
Expand All @@ -371,12 +370,12 @@ def fetch_single_time_series(self, market_data_request):
if expiry_date is not None:
expiry_date = pandas.Timestamp(expiry_date).date()

# use pandas Timestamp, a bit more robust with weird dates (can fail if comparing date vs datetime)
# Use pandas Timestamp, a bit more robust with weird dates (can fail if comparing date vs datetime)
# if the expiry is before the start date of our download don't bother downloading this ticker
if expiry_date < start_date:
tickers[i] = None

# special case for futures-contracts which are intraday
# Special case for futures-contracts which are intraday
# avoid downloading if the expiry date is very far in the past
# (we need this before there might be odd situations where we run on an expiry date, but still want to get
# data right till expiry time)
Expand Down Expand Up @@ -404,7 +403,7 @@ def fetch_single_time_series(self, market_data_request):
if data_frame_single.empty == False:
data_frame_single.index.name = 'Date'

# will fail for dataframes which includes dates/strings (eg. futures contract names)
# Will fail for DataFrames which includes dates/strings (eg. futures contract names)
try:
data_frame_single = data_frame_single.astype('float32')
except:
Expand All @@ -427,7 +426,7 @@ def fetch_group_time_series(self, market_data_request_list):
if thread_no > 0:
pool = SwimPool().create_pool(thread_technique = constants.market_thread_technique, thread_no=thread_no)

# open the market data downloads in their own threads and return the results
# Open the market data downloads in their own threads and return the results
result = pool.map_async(self.fetch_single_time_series, market_data_request_list)
data_frame_group = result.get()

Expand All @@ -439,11 +438,11 @@ def fetch_group_time_series(self, market_data_request_list):
for md_request in market_data_request_list:
data_frame_group.append(self.fetch_single_time_series(md_request))

# collect together all the time series
# Collect together all the time series
if data_frame_group is not None:
data_frame_group = [i for i in data_frame_group if i is not None]

# for debugging!
# For debugging!
# import pickle
# import datetime
# pickle.dump(data_frame_group, open(str(datetime.datetime.now()).replace(':', '-').replace(' ', '-').replace(".", "-") + ".p", "wb"))
Expand Down Expand Up @@ -478,7 +477,7 @@ def download_daily(self, market_data_request):
is_key_overriden = True
break

# by default use other
# By default use other
thread_no = constants.market_thread_no['other']

if market_data_request.data_source in constants.market_thread_no:
Expand All @@ -491,12 +490,12 @@ def download_daily(self, market_data_request):
else:
market_data_request_list = []

# when trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution
# When trying your example 'equitiesdata_example' I had a -1 result so it went out of the comming loop and I had errors in execution
group_size = max(int(len(market_data_request.tickers) / thread_no - 1),0)

if group_size == 0: group_size = 1

# split up tickers into groups related to number of threads to call
# Split up tickers into groups related to number of threads to call
for i in range(0, len(market_data_request.tickers), group_size):
market_data_request_single = copy.copy(market_data_request)
market_data_request_single.tickers = market_data_request.tickers[i:i + group_size]
Expand All @@ -507,7 +506,7 @@ def download_daily(self, market_data_request):

market_data_request_list.append(market_data_request_single)

# special case where we make smaller calls one after the other
# Special case where we make smaller calls one after the other
if is_key_overriden:

data_frame_list = []
Expand All @@ -526,7 +525,7 @@ def download_daily(self, market_data_request):

def refine_expiry_date(self, market_data_request):

# expiry date
# Expiry date
if market_data_request.expiry_date is None:
ConfigManager().get_instance().get_expiry_for_ticker(market_data_request.data_source, market_data_request.ticker)

Expand Down
9 changes: 5 additions & 4 deletions findatapy/timeseries/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ def make_FX_1_min_working_days(self, data_frame):
return data_frame

def remove_out_FX_out_of_hours(self, data_frame):
"""Filtered a time series for FX hours (ie. excludes 22h GMT Fri - 19h GMT Sun)
"""Filtered a time series for FX hours (ie. excludes 22h GMT Fri - 19h GMT Sun and New Year's Day)
Parameters
----------
Expand All @@ -715,9 +715,10 @@ def remove_out_FX_out_of_hours(self, data_frame):
# remove Sun before 19:00 GMT

# Monday = 0, ..., Sunday = 6
data_frame = data_frame.ix[~((data_frame.index.dayofweek == 4) & (data_frame.index.hour > 22))]
data_frame = data_frame.ix[~((data_frame.index.dayofweek == 5))]
data_frame = data_frame.ix[~((data_frame.index.dayofweek == 6)& (data_frame.index.hour < 19))]
data_frame = data_frame[~((data_frame.index.dayofweek == 4) & (data_frame.index.hour > 22))]
data_frame = data_frame[~((data_frame.index.dayofweek == 5))]
data_frame = data_frame[~((data_frame.index.dayofweek == 6)& (data_frame.index.hour < 19))]
data_frame = data_frame[~((data_frame.index.day == 1) & (data_frame.index.month == 1))]

return data_frame

Expand Down
14 changes: 9 additions & 5 deletions findatapy/timeseries/retstats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__author__ = 'saeedamen' # Saeed Amen
__author__ = 'saeedamen' # Saeed Amen

#
# Copyright 2016 Cuemacro
Expand All @@ -24,9 +24,10 @@ class RetStats(object):
"""

def __init__(self, returns_df = None, ann_factor = None):
def __init__(self, returns_df=None, ann_factor=None, resample_freq=None):
self._returns_df = returns_df
self._ann_factor = ann_factor
self._resample_freq = resample_freq

self._rets = None
self._vol = None
Expand Down Expand Up @@ -81,7 +82,7 @@ def calculate_ret_stats_from_prices(self, prices_df, ann_factor):

self.calculate_ret_stats(calculations.calculate_returns(prices_df), ann_factor)

def calculate_ret_stats(self, returns_df = None, ann_factor = None):
def calculate_ret_stats(self, returns_df=None, ann_factor=None):
"""Calculates return statistics for an asset's returns including IR, vol, ret and drawdowns
Parameters
Expand All @@ -99,6 +100,9 @@ def calculate_ret_stats(self, returns_df = None, ann_factor = None):
if returns_df is None: returns_df = self._returns_df
if ann_factor is None: ann_factor = self._ann_factor

if self._resample_freq is not None:
returns_df = returns_df.resample(self._resample_freq).sum()

# TODO work on optimizing this method
self._rets = returns_df.mean(axis=0) * ann_factor
self._vol = returns_df.std(axis=0) * math.sqrt(ann_factor)
Expand Down Expand Up @@ -204,6 +208,6 @@ def summary(self):
+ "% Vol = " + str(round(self._vol[i] * 100, 1))
+ "% IR = " + str(round(self._inforatio[i], 2))
+ " Dr = " + str(round(self._dd[i] * 100, 1))
+ "% Kurt = " + str(round(self._kurtosis[i], 2)))
+ "%") # Kurt = " + str(round(self._kurtosis[i], 2)))

return stat_list
return stat_list
1 change: 1 addition & 0 deletions findatapy/util/loggermanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import logging
import logging.config

from findatapy.util.dataconstants import DataConstants
from findatapy.util.singleton import Singleton

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
tickers, using configuration files. There is also functionality which is particularly useful for those downloading FX market data."""

setup(name='findatapy',
version='0.1.11',
version='0.1.12',
description='Market data library',
author='Saeed Amen',
author_email='saeed@cuemacro.com',
Expand Down

0 comments on commit 55eaca5

Please sign in to comment.