In [1]:
import pandas as pd
import numpy as np
import os.path
try:
    from urllib2 import urlopen
except ImportError:
    from urllib.request import urlopen

### Helper Functions ###

#### Step 0: Load the helper functions below, you will be using them later to evaluate your logic. 
It is very IMPORTANT that you DONOT change these functions or your final submission will not evaluate correctly. The functions to modify are mentioned below

* Dataset File Download Function

In [2]:
#### LOAD THE HELPER FUNCTIONS BELOW####
#### IMPORTANT: DONOT change these functions or your final submission will not evaluate correctly###

## This downloads your datafile, Do not change this function
def downloadFile(dataSetId):
      fileName = '%s.csv' % (dataSetId)
      url = 'https://raw.githubusercontent.com/Auquan/qq-winter2019/master/' + fileName

      response = urlopen(url)
      status = response.getcode()
      if status == 200:
          print('Downloading the dataset %s' % (fileName))
          with open(fileName, 'w') as f:
              f.write(response.read().decode('utf8'))
          return True
      else:
          logError('File not found. Please ensure you are working with correct data set Id')
          return False

    

* Reward Function

In [3]:
## This calculates the reward, Do not change this function
def getReward(wt, wt_1, ri, l, k):
    if (ri is None) or (wt is None):
        port_returns.append(0)
        sharpe_ratio.append(0)
        port_volatility.append(0)
        reward = 0
    else:
        ri.fillna(0, inplace=True)
        returns = np.dot(wt, ri)
        port_returns.append(returns)
        downside_returns = [x for x in port_returns if x < 0]
        volatility = np.std(downside_returns) #np.sqrt(np.dot(weights.T, np.dot(cov_annual, weights)))
        sharpe = np.sum(port_returns) / volatility if volatility!=0 else 0
        sharpe_ratio.append(sharpe)
        port_volatility.append(volatility)
        if (wt_1 is None):
            phi = 0
        else:
            ix = wt.index | wt_1.index
            tn = wt.reindex(ix) - wt_1.reindex(ix)
            tn[tn.isnull()&tn.index.isin(wt.index)] = wt
            tn[tn.isnull()&tn.index.isin(wt_1.index)] = -wt_1
            phi = k*tn.abs().sum()
        reward = returns - l*volatility - phi
        print('returns, volatility, phi, sharpe, reward')
        print(returns, volatility, phi, sharpe, reward)
    return reward

* Check Constraints Functions

In [4]:
## Do not change this function, this verifies if all constraints are met
def checkConstraints(wt, wt_1, wi, Dt, St, Qt, g, U, t, T, P, delta, chi, eta):
    violated = False
    tol = 0.009
    if np.abs(wt.sum()-1)>tol:
        print(wt.sum())
        print("Fully Invested Constraint Violated: Sum of weights is not 1")
        violated = True
    div_constraint = np.maximum(g, 1/float(Qt.sum()))
    if (wt.abs()-div_constraint>tol).any():
        print(wt[(wt.abs()-div_constraint>tol)])
        print("Diversification Constraint Violated: All weights are not less than parameter %.2f"%div_constraint )
        violated = True
    if wt_1 is None:
        turnover = 0
    else:
        ix = wt.index | wt_1.index
        tn = wt.reindex(ix) - wt_1.reindex(ix)
        tn[tn.isnull()&tn.index.isin(wt.index)] = wt
        tn[tn.isnull()&tn.index.isin(wt_1.index)] = -wt_1
        turnover = (tn).abs().sum()/2
    turnover_list.append(turnover)
    if (np.sum(turnover_list[-12:])>U):
        print("%0.2f Turnover Constraint Violated: Turnover Limit exceeded"%np.sum(turnover_list[-12:]))
        violated = True
    if (wt<t).any():
        print("Shortsell Constraint Violated: all weights are not greater than parameter t")
        violated = True
    if wt[wt<0].sum()<T:
        print("Max Shortsell Constraint Violated: sum of all weights are not greater than parameter T")
        violated = True
    if wt[wt!=0].count() < np.minimum(P, len(wt)):
        print("Min number of positions Constraint Violated: count of all weights <>0 %i are not greater than parameter P%i"%(wt[wt!=0].count(), np.minimum(P, len(wt))))
        violated = True
    if (wt*Dt).sum()/ (wi*Dt).sum() > delta:
        print("Duration Constraint Violated: wt*Dt/ wi*Dt is greater than parameter delta")
        violated = True
    if (wt*St).sum()/ (wi*St).sum() > chi:
        print("Spread Constraint Violated: wt*St/ wi*St is greater than parameter chi")
        violated = True
    if (wt*(1-Qt)).abs().sum()>tol:
        print("Qualification Constraint Violated: wt*(1-qt) is not zero %.2f"%(wt*(1-Qt)).abs().sum())
        violated = True
#     if returns - Rlow/ volatility <= np.sqrt(1-eta):
#         print("Max Risk probability Constraint Violated: returns - Rlow/ volatility <= np.sqrt(1-eta)")
            
    return violated
        
        

In [5]:
## Do not change this function, this verifies if final constraints are met
def checkFinalConstraints(Rmin, volmax):
    violated = False
    if np.sum(port_returns)<Rmin:
        print("Total Return Constraint Violated: Total Return is less than Index Return")
        violated = True
    if port_volatility[-1]>volmax:
        print("Volatility Constraint Violated: Vol is higher than Index Vol")
        violated = True
    if sharpe_ratio[-1]<Rmin/volmax:
        print("Sharpe Ratio Constraint Violated: SR is less than Index SR")
        violated = True
        return violated

## Functions to Modify

Now you can fill your logic in functions below. Once you are satisfied, copy the body of the functions below in the template file

**NOTE: When copying functions to template file, only copy the body of the function. DONOT change the function name or signature in template file else submission will not evaluate**

`def getSymbolsToTrade():`  --> This is function name and signature. <br/>
`  return 'G2'`    --> This is function body

#### DONOT change the function name or signature in template file else submission will not evaluate

### Step 1: Fill the asset group you want to model for

In [6]:
#### FILL THE FUNCTIONS BELOW ####

## Step 1: Fill the asset group you want to model for
## This can be 'G1' or 'G2'
def getSymbolsToTrade():
    ################################################
    ####   COPY FROM BELOW INTO TEMPLATE FILE   ####
    ################################################
    
    return 'G1'


### Step 2: Fill the logic to generate weights below

Do not change the inputs to the function. If you want any extra inputs, specify them in `**kwargs`
You can lookup this tutorial on how to use `**kwargs` https://www.geeksforgeeks.org/args-kwargs-python/

In [7]:
## Step 2: Fill the logic to generate weights

## This function takes in the following inputs:
## identifiers: asset identifiers
## reward: reward at time t (based on w(t-1))
## wi: weights to initialize from, if you want to use
## Dt: value of column 'd' per asset
## St: value of column 'S' per asset
## Qt: value of column 'q' per asset
## g: value of constant gamma, read problem descrption for details
## U: value of constant U, read problem descrption for details
## t: value of constant t, read problem descrption for details
## T: value of constant T, read problem descrption for details
## P: value of constant P, read problem descrption for details
## delta: value of constant delta, read problem descrption for details
## chi: value of constant chi, read problem descrption for details
## eta: value of constant eta, read problem descrption for details
## **kwargs: any additional params you want to add can be specified here. kwargs is a dictionary

### do not change the inputs to the function. If you want any extra inputs, specify them in **kwargs
### you can lookup this tutorial on how to use **kwargs https://www.geeksforgeeks.org/args-kwargs-python/

def getWeights(identifiers, reward, wi, Dt, St, Qt, g, U, t, T, P, delta, chi, eta, df, trr, **kwargs):
    ################################################
    ####   COPY FROM BELOW INTO TEMPLATE FILE   ####
    ################################################
    
    ## trr column doesn't exist in df, last period trr is separately provided to you
    if trr is not None:
        try:
            print(df['trr'].iloc[0])
        except KeyError:
            print('The right way to call TRR is: ', trr.iloc[0])
    ## to use kwargs
    var = kwargs['var']
    test = kwargs['func_test'](2)
    print('Test Function:', var, test)
    if trr is None:
        weights = wi.copy()
    else:
        weights = pd.Series(np.random.random(len(identifiers)), index=identifiers)
    weights[Qt==0] = 0
    weights = weights/weights.sum()
    div_constraint = np.maximum(g, 1/float(Qt.sum()))
    if (weights>div_constraint).any():
        weights[weights>div_constraint] = div_constraint
        weights = weights/weights.sum()
    ## You can call new features like below:
    CustomFeaturesCls = CustomFeatures()
    test = CustomFeaturesCls.newFeature1()
    return weights
    
    

### Step 2b. If you are using any extra arguments in kwargs, specify them below

Return a dictionary with keys as variableKeys and values as the variable you want to be passed

In [8]:
## Step 2b: Fill extra arguments below. See sample below

def getKwargs():   
    return {'var': 2, 'func_test': lambda x: 2*x}

### Step 3: Optional: Fill in the logic to return predictions for return on asset

**Note TRR: Total Return at the end of the period - VERY IMPORTANT!! Don’t use TRR for timestamp t in making predictions for time t. You can use it for time t+1 and later**


In [9]:
## Step 3: Optional: Fill in the logic to return predictions for return on asset
## This function takes in the same inputs as getWeights()

### do not change the inputs to the function. If you want any extra inputs, specify them in **kwargs
### you can lookup this tutorial on how to use **kwargs https://www.geeksforgeeks.org/args-kwargs-python/

def getPrediction(identifiers, wi, Dt, St, Qt, g, U, t, T, P, delta, chi, eta, **kwargs):
    ################################################
    ####   COPY FROM BELOW INTO TEMPLATE FILE   ####
    ################################################
    
    return np.zeros(len(identifiers))

### Step 4: Optional: If your code uses any other helper functions, fill them here

use this in functions above in the following manner: </br>

```CustomFeatures = CustomFeatures()
z = CustomFeatures.newFeature1()```

In [10]:
## Step 4: Optional: If your code uses any other helper functions, fill them here

### again do not change the inputs to the function. If you want any extra inputs, specify them in **kwargs
### you can lookup this tutorial on how to use **kwargs https://www.geeksforgeeks.org/args-kwargs-python/
class CustomFeatures():

    def newFeature1(self, **kwargs):
    ################################################
    ####   COPY FROM BELOW INTO TEMPLATE FILE   ####
    ################################################
    
        return None

### Step 5: Run the cells below to evaluate. 

The snippet below downloads the correct dataset and lets you explore the dataset.

**Note Remember TRR: Total Return at the end of the period - VERY IMPORTANT!! Don’t use TRR for timestamp t in making predictions for time t. You can use it for time t+1 and later**


In [11]:
## This loads asset data and lets you explore the dataset
## Make sure the worksheet and data are in the same folder

index = getSymbolsToTrade()
if not os.path.isfile('%s.csv'%index):
    downloadFile(index)
idx_data = pd.read_csv('%s.csv'%index, index_col='TimeStamp')
idx_data.sort_index(axis=0, level=None, ascending=True, inplace=True)
print(idx_data.size)
idx_data=idx_data[~idx_data.duplicated()]
print(idx_data.size)
print("Data Column Names:")
print(idx_data.columns)
print(index, idx_data.index[0], idx_data.index[-1])
print(idx_data.head(10))

3416422
3411322
Data Column Names:
Index([u'AssetGroup', u'Identifier', u'F2', u'F3', u'F4', u'F5', u'q', u'F7',
       u'F8', u'd', u'S', u'F11', u'F12', u'F13', u'F14', u'F15', u'F16',
       u'F17', u'wI', u'F19', u'TRR', u'F21', u'F22', u'F23', u'F24', u'F25',
       u'F26', u'F27', u'F28', u'F29', u'F30', u'F31', u'F32', u'F33'],
      dtype='object')
('G1', '1998-12-31', '2018-11-30')
           AssetGroup Identifier        F2                  F3  \
TimeStamp                                                        
1998-12-31         G1   86a1f1ee  bc068363       Capital Goods   
1998-12-31         G1   de5b9bca  e5fb34b9  Telecommunications   
1998-12-31         G1   2a0a4ee3  62835655      Basic Industry   
1998-12-31         G1   a67fb965  81f811c5      Transportation   
1998-12-31         G1   52f697cc  2becce58      Basic Industry   
1998-12-31         G1   bf46c4bb  57847ea5  Telecommunications   
1998-12-31         G1   e39dffda  9c69fc3a  Telecommunications   
1998-12-31  

**The cells below evaluate your functions. DONOT CHANGE ANYTHING BELOW THIS!!**

In [12]:
## DONOT CHANGE ANYTHING BELOW THIS

# empty lists to store returns, volatility and weights of imaginary portfolios
port_returns = []
port_volatility = []
sharpe_ratio = []
asset_weights = []
reward_list = []
turnover_list = []
idx_returns = []

#port_returns ,port_volatility ,sharpe_ratio ,asset_weights ,reward_list ,turnover_list ,idx_returns 

#empty df to store previous period returns
ri = None

#empty dict to store values by date
dict_metrics_by_date = {}


In [13]:
### Evaluator to getweights at everytime t and calcuate reward + check if constraints are met

## specifying all the constants
counter = 0
l = 0.1
k = 0.03
g = 0.03
U = 5
t= 0
T = 0
P = 80
delta = 0.5
chi = 0.1
eta = 0.95

## initializing arrays
dates = idx_data.index.unique()
wi = None

## looping over all dates
while counter < len(dates):
    date = dates[counter]
    
    print('################# \nDATE %s'%date)
    
    ## load all the data for a date
    date_data = idx_data[idx_data.index == date]
    
    ## get all the identifiers for a date
    
    date_data.set_index( date_data['Identifier'], inplace=True)
    date_data = date_data[~date_data.index.duplicated()]
    cusips = date_data['Identifier']
    
    ## old weights
    wt_1 = asset_weights[-1] if len(asset_weights)>0 else None
    wt_2 = asset_weights[-2] if len(asset_weights)>1 else None
    
    ##old index weights
    wi_t_1 = None if wi is None else wi.copy()
    
    ## calculate reward at start of time t from weights allocated at time t-1
    reward = getReward(wt_1, wt_2, ri, l, k)
    
    ## calculate index return at start of time t from weights allocated at time t-1
    if ri is None or wi_t_1 is None:
        idx_returns.append(0)
    else:
        idx_returns.append(np.dot(wi_t_1, ri))
    
    #load specific feature info for time t
    wi = pd.Series(date_data['wI']/100, index = date_data['Identifier'])
    Dt = pd.Series(date_data['d'], index = date_data['Identifier'])
    St = pd.Series(date_data['S'], index = date_data['Identifier'])
    qt = pd.Series(date_data['q'], index = date_data['Identifier'])
    
    ## get new weights
    wt = getWeights(cusips, reward, wi, Dt, St, qt, g, U, t, T, P, delta, chi, eta, \
                        date_data.drop(columns=['TRR']), ri, **getKwargs()) 

    
    ## store relevant info in their lists
    asset_weights.append(wt)
    reward_list.append(reward)
    
    ## verify if all constraints are met
    if checkConstraints(wt, wt_1, wi, Dt, St, qt, g, U, t, T, P, delta, chi, eta):
        print("ERROR!!!! weights don't meet contraints, exiting")
        break
    
    dict_metrics_by_date[date] = {'returns': port_returns[-1],
                                  'volatility': port_volatility[-1] ,
                                  'Sharpe Ratio': sharpe_ratio[-1],
                                  'Index Returns': idx_returns[-1],
                                  'Reward': reward_list[-1],
                                  '12m turnover' : turnover_list[-1],
                                  'weights' :asset_weights[-1]}
    
    counter += 1
    
    ## Store end of month returns to calculate reward in next period
    del ri
    ri = pd.Series(date_data['TRR'], index = date_data['Identifier'])

## Calculate returns for last period    
if ri is not None:
    reward = getReward(wt, wt_1, ri, l, k)
    reward_list.append(reward)
    idx_returns.append(np.dot(wi, ri))
## check if contraints on total return and risk are met        
Rmin = np.sum(idx_returns)
volmax = np.std(idx_returns)
if checkFinalConstraints(Rmin, volmax):
    print("ERROR!!!! weights don't meet return/risk limit contraints, exiting")
else:
    print("Portfolio Metrics:")
    print("Total Return: %.2f"%np.sum(port_returns))
    print("Standard Deviation: %.2f"%port_volatility[-1])
    print("Sharpe Ratio: %.2f"%sharpe_ratio[-1])
    

################# 
DATE 1998-12-31
('Test Function:', 2, 4)
Duration Constraint Violated: wt*Dt/ wi*Dt is greater than parameter delta
Spread Constraint Violated: wt*St/ wi*St is greater than parameter chi
ERROR!!!! weights don't meet contraints, exiting
Portfolio Metrics:
Total Return: 0.00
Standard Deviation: 0.00
Sharpe Ratio: 0.00


  # Remove the CWD from sys.path while we load stuff.
