In [3]:
from portfolio import PortfolioGenerator
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm
import math
import matplotlib.pyplot as plt

In [4]:
def read_stock_data():
    '''
    Description:
        Reads in simulated stock data from stock_data.csv
    Returns:
        stock_df (DataFrame): standardized ticker/factor data in pandas df
    Raises:
        AssertionError: ticker_data.csv/factor_data.csv has an invalid format
    '''
    ticker_df = pd.read_csv('stock_data/ticker_data.csv')
    factor_df = pd.read_csv('stock_data/factor_data.csv')
    assert 'timestep' in ticker_df.columns, "ticker_data.csv has an invalid format"
    assert 'ticker' in ticker_df.columns, "ticker_data.csv has an invalid format"
    assert 'returns' in ticker_df.columns, "ticker_data.csv has an invalid format"
    assert 'timestep' in factor_df.columns, "factor_data.csv has an invalid format"
    ticker_df.set_index('timestep', inplace=True)
    factor_df.set_index('timestep', inplace=True)
    stock_df = ticker_df.join(factor_df, how='left')
    return stock_df

In [5]:
df = read_stock_data()
#ticker_df = pd.read_csv('stock_data/ticker_data.csv')

In [6]:
df = df.rename(columns={'3M_R':'THREEMR'})
df.assign(rf_rate = (np.power(1+ df.THREEMR,1/90) -1))
;

''

In [7]:
df_piv = df.pivot(columns = 'ticker', values = 'returns')

In [8]:
df.head()

Unnamed: 0_level_0,index,industry,market_cap,pb,returns,ticker,VIX,COPP,THREEMR,US_TRY,BIG_IX,SMALL_IX,SENTI,TEMP,RAIN,OIL
timestep,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,1,TECH,108653100000.0,0.910461,,0,15.0,90.0,0.06,0.27,100.0,100.0,80.0,74.819656,0.639413,65.0
0,2,TECH,176760000000.0,1.941031,,1,15.0,90.0,0.06,0.27,100.0,100.0,80.0,74.819656,0.639413,65.0
0,3,TECH,57875640000.0,3.705055,,2,15.0,90.0,0.06,0.27,100.0,100.0,80.0,74.819656,0.639413,65.0
0,4,TECH,78960000000.0,3.868566,,3,15.0,90.0,0.06,0.27,100.0,100.0,80.0,74.819656,0.639413,65.0
0,5,TECH,136218400000.0,4.46685,,4,15.0,90.0,0.06,0.27,100.0,100.0,80.0,74.819656,0.639413,65.0


In [9]:
#df_piv.iloc[:, 1]

In [10]:
try_list = []

In [11]:
count = 0
for tick in df_piv.columns:
    count = count + 1
    tick = 1
    short_window = 20
    long_window = 40

    ex_1_s = df_piv.iloc[:, tick]

    signals_df = pd.DataFrame(index = df_piv.index)
    signals_df['signal'] = 0

    signals_df['short_mavg'] = ex_1_s.rolling(window = short_window, min_periods = 1, center = False).mean()
    signals_df['long_mavg'] = ex_1_s.rolling(window = long_window, min_periods = 1, center = False).mean()

    signals_df['signal'][short_window:] = np.where(signals_df['short_mavg'][short_window:] > signals_df['long_mavg'][short_window:], 1.0, 0.0)   

    signals_df['positions'] = signals_df['signal'].diff()
    try_list.append(signals_df['positions'])
    #signals_tot_df.append(signals_df['positions'])
    #signals_tot_df = pd.concat([signals_tot_df, signals_df.iloc[:, 0]], ignore_index=True)
    #pd.concat(signals_tot_df.iloc[tick] = signals_df.iloc[:, 0]
    #print(signals_df)
    #signals_tot_df.concat(signals_df.iloc[:, 0])
#print(str(count))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [12]:
full_df = pd.DataFrame(try_list)

In [13]:
stock_ind = []
for i in range(1000):
    stock_ind.append(i)
full_df.index = stock_ind

In [16]:
full_df.index.name = 'stock_ind'
full_df.columns.name = 'timestep'

In [21]:
full_df_t = full_df.transpose()

In [24]:
full_df_t.head(100)

stock_ind,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
timestep,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,,,,,,,,,,
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [71]:
def simulate_portfolio(self):
    '''
    Description:
        Simulates performance of the portfolio on historical data
    Return:
        sharpe (int) - sharpe ratio for the portfolio
    '''
    daily_returns = []
    stock_df = self.read_stock_data()
    for idx in stock_df.index.unique():
        print("timestep", idx)
        if idx < MAX_LOOKBACK:
            continue
        stock_features = stock_df.loc[idx-MAX_LOOKBACK:idx-1]
        returns = stock_df.loc[idx:idx].set_index('ticker')['returns']
        signal = self.build_signal(stock_features)
        signal_return = returns * signal
        daily_returns.append(np.mean(signal_return))
    sharpe_ratio = np.sqrt(252) * (np.mean(daily_returns) / np.std(daily_returns))
    return sharpe_ratio