In [1]:
# The function below will provide input features for a prediction model. 
# We calculate information ratios based on daily prices.
# The following information ratios will be calculated:
# Sharpe Ratio
# Information Ratio
# Tracking Error
# Treynor Ratio
# Sortino Ratio
# CAGR % (Compound Annual Growth Rate)

# The functions returns a list of ratios in the order given above.

# Please make sure the following packageS are available: yahoofinancials, numpy, pandas, scipy, datetime, math
from yahoofinancials import YahooFinancials
import numpy as np
import pandas as pd
from scipy import stats
import datetime
import math

# Constants for outlier filtering
q_low = 0.01
q_hi = 0.99

# Function inputs: 
# fund: e.g. 'PDGIX' (T. Rowe Price Dividend Growth I)
# benchmark: e.g. '^GSPC' (S&P 500 Index)
# risk_free_rate: (input for Sharpe Ratio and Treynor Ratio) should be given in percentage (e.g. 1)
# in yearly (annualized) terms
# marr: (minimum acceptable rate of return, an input for Sortino Ratio) should be given in percentage (e.g. 2)
# in yearly (annualized) terms
# start_date and end_date should be formatted as: 'yyyy-mm-dd'
# annualize: True if we want the ratios annualized, False otherwise
# trading_days_in_year: used for annualization (by convention there are 252 trading days in a year)

def information_ratios(fund, benchmark, risk_free_rate, marr, start_date, end_date, annualize, trading_days_in_year):
    
    # get daily pricing for the fund and its benchmark
    yahoo_financials_fund = YahooFinancials(fund)
    yahoo_financials_benchmark = YahooFinancials(benchmark)
    daily_mutualfund_prices = yahoo_financials_fund.get_historical_price_data(start_date, end_date, 'daily')
    daily_benchmark_prices = yahoo_financials_benchmark.get_historical_price_data(start_date, end_date, 'daily')

    # create dataframes and calculate daily (log) returns using adjusted closing price
    fund_df = pd.DataFrame(daily_mutualfund_prices[fund]['prices'])
    fund_df = fund_df[['formatted_date', 'adjclose']].set_index('formatted_date')
    fund_df['daily_return_fund'] = np.log(fund_df['adjclose'] / fund_df['adjclose'].shift(1))
    #fund_df['daily_return_fund'] = fund_df['adjclose'].pct_change(1)

    benchmark_df = pd.DataFrame(daily_benchmark_prices[benchmark]['prices'])
    benchmark_df = benchmark_df[['formatted_date', 'adjclose']].set_index('formatted_date')
    benchmark_df['daily_return_bm'] = np.log(benchmark_df['adjclose'] / benchmark_df['adjclose'].shift(1))
    #benchmark_df['daily_return_bm'] = benchmark_df['adjclose'].pct_change(1)

    df_returns = fund_df.join(benchmark_df, how = 'outer',lsuffix='_fund', rsuffix='_bm')
    df_returns = df_returns[['daily_return_fund', 'daily_return_bm']].sort_values(by = 'formatted_date')
    
    # cleaning data
    # replace faulty data by nan, remove outliers (daily return outside of (q_low, q_hi) quantile range)
    df_returns.replace([np.inf, -np.inf], np.nan, inplace=True)
    q_fund_low = df_returns['daily_return_fund'].quantile(q_low)
    q_fund_hi  = df_returns['daily_return_fund'].quantile(q_hi)
    q_bm_low = df_returns['daily_return_bm'].quantile(q_low)
    q_bm_hi  = df_returns['daily_return_bm'].quantile(q_hi)

    df_returns = df_returns[(df_returns['daily_return_fund'] < q_fund_hi) 
                            & (df_returns['daily_return_fund'] > q_fund_low) 
                            & (df_returns['daily_return_bm'] < q_bm_hi)
                            & (df_returns['daily_return_bm'] > q_bm_low)]
    
    # convert risk_free_rate and marr to same frequency as returns (daily)
    rf = np.power(1 + risk_free_rate/100, 1. / 252) - 1.
    mr = np.power(1 + marr/100, 1. / 252) - 1.
    
    # calculate excess daily return, return above benchmark and MARR
    df_returns['daily_excess_return'] = df_returns['daily_return_fund'] - rf
    df_returns['daily_return_over_bm'] = df_returns['daily_return_fund'] - df_returns['daily_return_bm']
    df_returns['daily_return_above_marr'] = df_returns['daily_return_fund'] - mr

    downside = (df_returns[df_returns['daily_return_above_marr']<0]['daily_return_above_marr'] **2).sum()
    / len(df_returns['daily_return_above_marr'])
   
    # preparing information ratios and some other stats
    beta,_,r_val,_,_, = stats.linregress(df_returns['daily_return_fund'].dropna(), 
                                         df_returns['daily_return_bm'].dropna())

    information_ratio = df_returns['daily_return_over_bm'].mean() / df_returns['daily_return_over_bm'].std()
    sharpe_ratio = df_returns['daily_excess_return'].mean() / df_returns['daily_excess_return'].std()
    sortino_ratio = df_returns['daily_return_above_marr'].mean() / np.sqrt(downside)
    treynor_ratio = df_returns['daily_excess_return'].mean() / beta
    
    # annualizing information ratios
    if annualize:
        ann_factor = 1 if trading_days_in_year is None else np.sqrt(trading_days_in_year)
    else: ann_factor = 1   
        
    information_ratio = information_ratio * ann_factor
    sharpe_ratio = sharpe_ratio * ann_factor
    sortino_ratio = sortino_ratio * ann_factor
    treynor_ratio = treynor_ratio * ann_factor
    tracking_error = df_returns['daily_return_over_bm'].std() * ann_factor
    
    # calculate the compound annual growth rate (CAGR%) of excess returns
    df_returns = df_returns.reset_index()
    end_str = df_returns['formatted_date'].iloc[-1]
    start_str = df_returns['formatted_date'].iloc[0]
    end_obj = datetime.datetime.strptime(end_str, '%Y-%m-%d')
    start_obj = datetime.datetime.strptime(start_str, '%Y-%m-%d')
    years = (end_obj.date() - start_obj.date()).days / 365.
    
    cagr = 100 * (math.exp(df_returns['daily_excess_return'].sum()) ** (1.0 / years) - 1)

    # printing results
    print('Sharpe Ratio for the fund', fund, 
          'with the risk-free rate of {}% is {}.'.format(risk_free_rate, round(sharpe_ratio,2)))
    print('Information Ratio for the fund', fund, 'with benchmark', 
        benchmark, 'is {}.'.format(round(information_ratio,2)))
    print('Coefficient of determination (R-squared) for the fund', fund, 'with benchmark', 
        benchmark, 'is {}.'.format(round(r_val**2,2)))
    print('Tracking Error is {}%.'.format(round(tracking_error*100,2)))
    print('Treynor Ratio with the risk-free rate of {}% is {}.'.format(risk_free_rate, round(treynor_ratio,2)))
    print('Sortino Ratio with MARR of {}% is {}.'.format(marr, round(sortino_ratio,2)))
    print('CAGR with the risk-free rate of {}% is {}%.'.format(risk_free_rate, round(cagr,2)))
 
    return (sharpe_ratio, information_ratio, tracking_error, treynor_ratio, sortino_ratio, cagr)

In [2]:
# Demonstrating the functionality on three funds and their benchmarks.
# Used risk_free_rate = 1, MARR = 2, start_date = '2017-01-31', end_date = '2018-01-31',
# annualize = True, trading_days_in_year = 252

information_ratios('PDGIX', '^GSPC', 1, 2, '2017-01-31', '2018-01-31', True, 252)
# T. Rowe Price Dividend Growth I, S&P 500 Index

information_ratios('QASGX', '^RUO', 1, 2, '2017-01-31', '2018-01-31', True, 252)
# Federated Hermes MDT Small Cap Growth Fund Class A Shares, Russel 2000 Growth Index

information_ratios('HISFX', '^RUT', 1, 2, '2017-01-31', '2018-01-31', True, 252)
# Hennessy Small Cap Financial Fund Institutional Class, Russel 2000 Index

Sharpe Ratio for the fund PDGIX with the risk-free rate of 1% is 3.63.
Information Ratio for the fund PDGIX with benchmark ^GSPC is -0.63.
Coefficient of determination (R-squared) for the fund PDGIX with benchmark ^GSPC is 0.87.
Tracking Error is 2.14%.
Treynor Ratio with the risk-free rate of 1% is 0.01.
Sortino Ratio with MARR of 2% is 0.38.
CAGR with the risk-free rate of 1% is 21.58%.
Sharpe Ratio for the fund QASGX with the risk-free rate of 1% is 2.21.
Information Ratio for the fund QASGX with benchmark ^RUO is 1.35.
Coefficient of determination (R-squared) for the fund QASGX with benchmark ^RUO is 0.92.
Tracking Error is 3.13%.
Treynor Ratio with the risk-free rate of 1% is 0.02.
Sortino Ratio with MARR of 2% is 0.22.
CAGR with the risk-free rate of 1% is 26.69%.
Sharpe Ratio for the fund HISFX with the risk-free rate of 1% is 0.08.
Information Ratio for the fund HISFX with benchmark ^RUT is -1.22.
Coefficient of determination (R-squared) for the fund HISFX with benchmark ^RUT i

(0.07646174340018307,
 -1.2192183080416041,
 0.09963834945517924,
 0.0013175503817332314,
 0.0006980403488195051,
 1.057323911562258)

In [18]:
import datetime
end_obj = datetime.datetime.strptime('1986-12-07', '%Y-%m-%d')
end_obj.date()
start_obj = datetime.datetime.strptime('1985-12-06', '%Y-%m-%d')
start_obj.date()
(end_obj.date() - start_obj.date()).days

366