Skip to content

Commit

Permalink
Fixing various new Pandas issues
Browse files Browse the repository at this point in the history
  • Loading branch information
saeedamen committed Aug 25, 2020
1 parent 8515260 commit c033b3f
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 91 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,15 @@ In findatapy/examples you will find several demos

# Release Notes

* 0.1.14 - findatapy (25 Aug 2020)
* 0.1.13 - findatapy (24 Aug 2020)
* 0.1.12 - findatapy (06 May 2020)

# Coding log

* 25 Aug 2020
* Fixes for newer Pandas
* Fixes for ALFRED downloading of economic data
* 24 Aug 2020
* Removed .ix references (to work with newer Pandas)
* 06 May 2020
Expand Down
110 changes: 61 additions & 49 deletions findatapy/market/datavendorbbg.py

Large diffs are not rendered by default.

29 changes: 16 additions & 13 deletions findatapy/market/datavendorweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,9 @@ def download_daily(self, market_data_request):
observation_start=market_data_request.start_date,
observation_end=market_data_request.finish_date)

data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full',
market_data_request.tickers[i] + '.close']
data_frame = data_frame.rename(columns={"realtime_start": market_data_request.tickers[i] + '.release-date-time-full',
"date": "Date",
"value" : market_data_request.tickers[i] + '.close'})

data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full'])
data_frame = data_frame.drop_duplicates(subset=['Date'], keep='last')
Expand All @@ -234,17 +235,18 @@ def download_daily(self, market_data_request):
observation_start=market_data_request.start_date,
observation_end=market_data_request.finish_date)

data_frame = pandas.DataFrame(data_frame)
data_frame.columns = [market_data_request.tickers[i] + '.close']
data_frame = data_frame.to_frame(name=market_data_request.tickers[i] + '.close')
data_frame.index.name = 'Date'

data_frame_list.append(data_frame)

if 'first-revision' in market_data_request.fields:
data_frame = fred.get_series_first_revision(market_data_request.tickers[i],
observation_start=market_data_request.start_date,
observation_end=market_data_request.finish_date)

data_frame = pandas.DataFrame(data_frame)
data_frame.columns = [market_data_request.tickers[i] + '.first-revision']
data_frame = data_frame.to_frame(name=market_data_request.tickers[i] + '.first-revision')
data_frame.index.name = 'Date'

filter = Filter()
data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
Expand All @@ -257,8 +259,9 @@ def download_daily(self, market_data_request):
observation_start=market_data_request.start_date,
observation_end=market_data_request.finish_date)

data_frame.columns = ['Date', market_data_request.tickers[i] + '.release-date-time-full',
market_data_request.tickers[i] + '.actual-release']
data_frame = data_frame.rename(columns={"realtime_start": market_data_request.tickers[i] + '.release-date-time-full',
"date": "Date",
"value" : market_data_request.tickers[i] + '.actual-release'})

data_frame = data_frame.sort_values(by=['Date', market_data_request.tickers[i] + '.release-date-time-full'])
data_frame = data_frame.drop_duplicates(subset=['Date'], keep='first')
Expand All @@ -275,8 +278,9 @@ def download_daily(self, market_data_request):
observation_start=market_data_request.start_date,
observation_end=market_data_request.finish_date)

data_frame = pandas.DataFrame(data_frame)
data_frame.columns = [market_data_request.tickers[i] + '.actual-release']
data_frame = data_frame.to_frame(name=market_data_request.tickers[i] + '.actual-release')
data_frame.index.name = 'Date'
# data_frame = data_frame.rename(columns={"value" : market_data_request.tickers[i] + '.actual-release'})

filter = Filter()
data_frame = filter.filter_time_series_by_date(market_data_request.start_date,
Expand All @@ -291,11 +295,10 @@ def download_daily(self, market_data_request):

data_frame = data_frame['realtime_start']

data_frame = pandas.DataFrame(data_frame)
data_frame.columns = [market_data_request.tickers[i] + '.release-date-time-full']
data_frame = data_frame.to_frame(name=market_data_request.tickers[i] + '.release-date-time-full')

data_frame.index = data_frame[market_data_request.tickers[i] + '.release-date-time-full']
data_frame = data_frame.sort()
data_frame = data_frame.sort_index()
data_frame = data_frame.drop_duplicates()

filter = Filter()
Expand Down
82 changes: 61 additions & 21 deletions findatapy/timeseries/calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,26 +1158,41 @@ def strip_linear_regression_output(self, indices, ols_list, var):

return df

##### various methods for averaging time series by hours, mins and days (or specific columns) to create summary time series
##### Various methods for averaging time series by hours, mins and days (or specific columns) to create summary time series
def average_by_columns_list(self, data_frame, columns):
return data_frame. \
groupby(columns).mean()

def average_by_hour_min_of_day(self, data_frame):
return data_frame. \
groupby([data_frame.index.hour.rename('hour'), data_frame.index.minute.rename('minute')]).mean()
# Older Pandas
try:
return data_frame. \
groupby([data_frame.index.hour.rename('hour'), data_frame.index.minute.rename('minute')]).mean()
except:
return data_frame. \
groupby([data_frame.index.hour, data_frame.index.minute]).mean()

def average_by_hour_min_of_day_pretty_output(self, data_frame):
data_frame = data_frame. \
groupby([data_frame.index.hour.rename('hour'), data_frame.index.minute.rename('minute')]).mean()
# Older Pandas
try:
data_frame = data_frame. \
groupby([data_frame.index.hour.rename('hour'), data_frame.index.minute.rename('minute')]).mean()
except:
data_frame = data_frame. \
groupby([data_frame.index.hour, data_frame.index.minute]).mean()

data_frame.index = data_frame.index.map(lambda t: datetime.time(*t))

return data_frame

def average_by_hour_min_sec_of_day_pretty_output(self, data_frame):
data_frame = data_frame. \
groupby([data_frame.index.hour.rename('hour'), data_frame.index.minute.rename('minute'), data_frame.index.minute.rename('second')]).mean()
# Older Pandas
try:
data_frame = data_frame. \
groupby([data_frame.index.hour.rename('hour'), data_frame.index.minute.rename('minute'), data_frame.index.minute.rename('second')]).mean()
except:
data_frame = data_frame. \
groupby([data_frame.index.hour, data_frame.index.minute, data_frame.index.minute]).mean()

data_frame.index = data_frame.index.map(lambda t: datetime.time(*t))

Expand Down Expand Up @@ -1205,9 +1220,16 @@ def average_by_year_hour_min_of_day_pretty_output(self, data_frame):
# time_of_day.append(temp.groupby(temp.index.time).mean())
#
# data_frame = pandas.concat(time_of_day, axis=1, keys = years)
data_frame = data_frame. \
groupby([data_frame.index.year.rename('year'), data_frame.index.hour.rename('hour'),
data_frame.index.minute.rename('minute')]).mean()

# Older Pandas
try:
data_frame = data_frame. \
groupby([data_frame.index.year.rename('year'), data_frame.index.hour.rename('hour'),
data_frame.index.minute.rename('minute')]).mean()
except:
data_frame = data_frame. \
groupby([data_frame.index.year, data_frame.index.hour,
data_frame.index.minute]).mean()

data_frame = data_frame.unstack(0)

Expand Down Expand Up @@ -1241,18 +1263,30 @@ def average_by_cal_day(self, data_frame):
def average_by_month_day_hour_min_by_bus_day(self, data_frame, cal="FX"):
date_index = data_frame.index

return data_frame. \
groupby([date_index.month.rename('month'),
Calendar().get_bus_day_of_month(date_index, cal).rename('day'),
date_index.hour.rename('hour'), date_index.minute.rename('minute')]).mean()
# Older Pandas
try:
return data_frame. \
groupby([date_index.month.rename('month'),
Calendar().get_bus_day_of_month(date_index, cal).rename('day'),
date_index.hour.rename('hour'), date_index.minute.rename('minute')]).mean()
except:
return data_frame. \
groupby([date_index.month,
Calendar().get_bus_day_of_month(date_index, cal),
date_index.hour, date_index.minute]).mean()

def average_by_month_day_by_bus_day(self, data_frame, cal="FX"):
date_index = data_frame.index

return data_frame. \
groupby([date_index.month.rename('month'),
Calendar().get_bus_day_of_month(date_index, cal).rename('day')]).mean()

# Older Pandas
try:
return data_frame. \
groupby([date_index.month.rename('month'),
Calendar().get_bus_day_of_month(date_index, cal).rename('day')]).mean()
except:
return data_frame. \
groupby([date_index.month,
Calendar().get_bus_day_of_month(date_index, cal)]).mean()
def average_by_month_day_by_day(self, data_frame):
date_index = data_frame.index

Expand All @@ -1268,9 +1302,15 @@ def group_by_year(self, data_frame):
def average_by_day_hour_min_by_bus_day(self, data_frame):
date_index = data_frame.index

return data_frame. \
groupby([Calendar().get_bus_day_of_month(date_index).rename('day'),
date_index.hour.rename('hour'), date_index.minute.rename('minute')]).mean()
# Older Pandas
try:
return data_frame. \
groupby([Calendar().get_bus_day_of_month(date_index).rename('day'),
date_index.hour.rename('hour'), date_index.minute.rename('minute')]).mean()
except:
return data_frame. \
groupby([Calendar().get_bus_day_of_month(date_index),
date_index.hour, date_index.minute]).mean()

def remove_NaN_rows(self, data_frame):
return data_frame.dropna()
Expand Down
8 changes: 8 additions & 0 deletions findatapy/timeseries/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,19 @@ def filter_time_series_by_date_offset(self, start_date, finish_date, data_frame,
if hasattr(data_frame.index, 'tz'):
if data_frame.index.tz is not None:

# If the start/finish dates are timezone naive, overwrite with the DataFrame timezone
if not(isinstance(start_date, str)):
start_date = start_date.replace(tzinfo=data_frame.index.tz)

if not(isinstance(finish_date, str)):
finish_date = finish_date.replace(tzinfo=data_frame.index.tz)
else:
# Otherwise remove timezone from start_date/finish_date
if not (isinstance(start_date, str)):
start_date = start_date.replace(tzinfo=None)

if not (isinstance(finish_date, str)):
finish_date = finish_date.replace(tzinfo=None)

if 'int' in str(data_frame.index.dtype):
return data_frame
Expand Down
24 changes: 21 additions & 3 deletions findatapy_examples/alfred_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

market = Market(market_data_generator=MarketDataGenerator())

# get the first release for GDP and also print the release date of that
# Get the first release for GDP and also print the release date of that
md_request = MarketDataRequest(
start_date="01 Jun 2000", # start date (download data over past decade)
data_source='alfred', # use ALFRED/FRED as data source
Expand All @@ -34,7 +34,7 @@

print(df)

# compare the close and actual release of US GDP (and the final)
# Compare the close and actual release of US GDP (and the final)
md_request = MarketDataRequest(
start_date="01 Jun 2000", # start date (download data over past decade)
data_source='alfred', # use ALFRED/FRED as data source
Expand All @@ -51,7 +51,7 @@

Chart().plot(df, style=style)

# get the change NFP SA (need to calculate that from the acutal-release and first-revision)
# Get the change NFP SA (need to calculate that from the acutal-release and first-revision)
md_request = MarketDataRequest(
start_date="01 Jun 2000", # start date (download data over past decade)
data_source='alfred', # use ALFRED/FRED as data source
Expand All @@ -75,3 +75,21 @@
df1 = pandas.DataFrame(df['US NFP change'])

Chart().plot(df1, style=style)

# Get release times on their own
# Get the change NFP SA
# need to calculate that from the acutal-release and first-revision)
md_request = MarketDataRequest(
start_date="01 Aug 2013",
finish_date="30 Nov 2019",
data_source='alfred',
tickers=['US NFP'],
fields=['release-date-time-full'],
vendor_tickers=['PAYEMS'],
vendor_fields=['release-date-time-full'])

market = Market(market_data_generator=MarketDataGenerator())

df_nfp = market.fetch_market(md_request)

print(df_nfp)
5 changes: 3 additions & 2 deletions findatapy_examples/cryptodata_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and limitations under the License.
#

# TODO: Needs fixing, given change in APIs

if __name__ == '__main__':
###### below line CRUCIAL when running Windows, otherwise multiprocessing doesn't work! (not necessary on Linux)
Expand Down Expand Up @@ -44,7 +45,7 @@

md_request = MarketDataRequest(start_date='11 Nov 2015', finish_date='02 Feb 2018', cut='LOC',
freq='tick', data_source='bitcoincharts', category='crypto',
fields=['close','volume'], tickers=['XBTUSD_itbit'])
fields=['close', 'volume'], tickers=['XBTUSD_itbit'])

df = market.fetch_market(md_request)
print(df.head(5))
Expand Down Expand Up @@ -125,7 +126,7 @@

md_request = MarketDataRequest(start_date='19 Feb 2018', finish_date='20 Feb 2018', cut='LOC',
freq='tick', data_source='kraken', category='crypto',
fields=['close','volume','buy-sell','market-limit'],
fields=['close', 'volume', 'buy-sell', 'market-limit'],
tickers=['XBTUSD'])

df = market.fetch_market(md_request)
Expand Down
4 changes: 2 additions & 2 deletions findatapy_examples/freddata_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@

md_request = MarketDataRequest(
start_date="01 Jun 2000", # start date (download data over past decade)
data_source='fred', # use FRED as data source
data_source='alfred', # use ALFRED/FRED as data source
tickers=['US CPI YoY', 'EZ CPI YoY'], # ticker
fields=['close'], # which fields to download
vendor_tickers=['CPIAUCSL', 'CP0000EZ17M086NEST'], # ticker (FRED)
vendor_fields=['close']) # which FRED fields to download
vendor_fields=['close']) # which ALFRED fields to download

df = market.fetch_market(md_request)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
tickers, using configuration files. There is also functionality which is particularly useful for those downloading FX market data."""

setup(name='findatapy',
version='0.1.13',
version='0.1.14',
description='Market data library',
author='Saeed Amen',
author_email='saeed@cuemacro.com',
Expand Down

0 comments on commit c033b3f

Please sign in to comment.