# Portfolio Construction Using Autoencoder Embeddings

In [None]:
import sys
sys.path.append('../../')

In [None]:
QUANDL_API_KEY = 'YOUR-QUANDL-API-HERE'

## Load S&P500 Stock Prices And Compute Returns

In [None]:
import pandas as pd

from data_loading.stocks_data_loaders import get_stock_attribute_data
from data_loading.stocks_data_loaders import get_stock_data
from data_loading.stocks_data_loaders import compute_stock_returns

sp_file='../../data/SP500_companies.csv'

sp_df=pd.read_csv(sp_file)

quandl_tickers=[f'WIKI/{symbol}' for symbol in sp_df['Symbol']]

start_date='2017-1-1'

end_date='2017-12-31'

stocks_df=get_stock_data(quandl_tickers,start_date,end_date,QUANDL_API_KEY)

prices_df=get_stock_attribute_data(stocks_df=stocks_df,attribute='Adj. Close')

# Renaming columns name as stocks symbols
renaming_map={key:key.split(' ')[0].split('/')[1] for key in prices_df.keys()}
prices_df=prices_df.rename(columns=renaming_map)

#dropping columns with missing symbols
prices_df=prices_df.dropna(axis=1,how='all') 

#collecting remaining stocks symbols 
symbols=prices_df.keys().tolist()

returns_df=compute_stock_returns(prices_df)

returns_df=returns_df.fillna(returns_df.mean())

## Stock Returns Autoencoder Embedding

In [None]:
import tensorflow as tf

from ml_models.neural_networks import create_autoencoder

samples=returns_df.values.T

encoder,autoencoder=create_autoencoder(input_dim=samples.shape[1],
                                       encoding_layers_sizes=[150,50,20],
                                       layers_activation='relu',
                                       embedding_activation='tanh',
                                       output_activation='linear')

optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)

autoencoder.compile(optimizer='adam',loss='mean_squared_error')

training=autoencoder.fit(samples,samples,epochs=1000,batch_size=16,shuffle=True)

encoded = encoder.predict(samples)

## Compute Stocks Closest neighbors In Embedding Space

In [None]:
# Yet to be implemented