In [23]:
import pandas as pd
import json
import random
import numpy as np
from pandas_datareader.data import DataReader
from dimod import Integer, Binary
from dimod import quicksum
from dimod import ConstrainedQuadraticModel, DiscreteQuadraticModel
from dwave.system import LeapHybridDQMSampler, LeapHybridCQMSampler

In [24]:
class SinglePeriod:
    """Define and solve a  single-period portfolio optimization problem.
    """
    def __init__(self, stocks=('AAPL', 'MSFT', 'AAL', 'WMT'), budget=1000,
                 bin_size=None, gamma=None, file_path='data/basic_data.csv',
                 dates=None, model_type='CQM', alpha=0.005, baseline='^GSPC',
                 sampler_args=None, t_cost=0.01, verbose=True):
        self.stocks = list(stocks)
        self.budget = budget
        self.init_budget = budget
        self.gamma_list = []
        self.file_path = file_path
        self.dates = dates
        self.model_type = model_type
        self.alpha_list = []
        self.baseline = [baseline]
        self.verbose = verbose
        self.t_cost = t_cost
        self.init_holdings = {s:0 for s in self.stocks}

        if isinstance(alpha, (list, tuple)):
            self.alpha = alpha[0]
            self.alpha_list = list(alpha)
        elif isinstance (alpha, (int, float)):
            self.alpha = alpha
        else:
            raise TypeError("Use integer or float for alpha, or a list or tuple of either type.")

        if gamma:
            if isinstance(gamma, (list, tuple)):
                self.gamma = gamma[-1]
                self.gamma_list = list(gamma)
            elif isinstance(gamma, (int, float)):
                self.gamma = gamma
            else:
                raise TypeError("Use integer or float for gamma, or a list or tuple of either type.")
        else:
            self.gamma = 100

        if bin_size:
            self.bin_size = bin_size
        else:
            self.bin_size = 10

        self.model = {'CQM': None, 'DQM': None}

        self.sample_set = {}
        if sampler_args:
            self.sampler_args = json.loads(sampler_args)
        else:
            self.sampler_args = {}

        # self.sampler = {'CQM': LeapHybridCQMSampler(**self.sampler_args),
        #         'DQM': LeapHybridDQMSampler(**self.sampler_args)}

        self.solution = {}
        self.precision = 2

    def load_data(self, file_path='', dates=None, df=None, num=0):
        if df is not None:
            print("\nLoading data from DataFrame...")
            self.df = df
            self.stocks = df.columns.tolist()
        elif dates or self.dates:
            if dates:
                self.dates = dates

            print(f"\nLoading live data from the web from Yahoo! finance", f"from {self.dates[0]} to {self.dates[1]}...")

            # Generating random list of stocks
            if num > 0:
                if (self.dates[0] < '2010-01-01'):
                    raise Exception(f"Start date must be >= '2010-01-01' " f"when using option 'num'.")
                symbols_df = pd.read_csv('data/stocks_symbols.csv')
                self.stocks = random.sample(list(symbols_df.loc[:,'Symbol']), num)

            # Read in daily data; resample to monthly
            panel_data = DataReader(self.stocks, 'yahoo',
                                    self.dates[0], self.dates[1])
            panel_data = panel_data.resample('BM').last()
            self.df_all = pd.DataFrame(index=panel_data.index,
                                       columns=self.stocks)
            for i in self.stocks:
                self.df_all[i] = panel_data[[('Adj Close',  i)]]

            # Read in baseline data; resample to monthly
            index_df = DataReader(self.baseline, 'yahoo',
                                  self.dates[0], self.dates[1])
            index_df = index_df.resample('BM').last()
            self.df_baseline = pd.DataFrame(index=index_df.index,
                                            columns=self.baseline)
            for i in self.baseline:
                self.df_baseline[i] = index_df[[('Adj Close',  i)]]

            self.df = self.df_all
        else:
            print("\nLoading data from provided CSV file...")
            if file_path:
                self.file_path = file_path

            self.df = pd.read_csv(self.file_path, index_col=0)

        self.init_holdings = {s:0 for s in self.stocks}

        self.max_num_shares = (self.budget/self.df.iloc[-1]).astype(int)
        if self.verbose:
            print("\nMax shares we can afford with a budget of", self.budget)
            print(self.max_num_shares.to_string())

        self.shares_intervals = {}
        for stock in self.stocks:
            if self.max_num_shares[stock]+1 <= self.bin_size:
                self.shares_intervals[stock] = list(range(self.max_num_shares[stock] + 1))
            else:
                span = (self.max_num_shares[stock]+1) / self.bin_size
                self.shares_intervals[stock] = [int(i*span)
                                                for i in range(self.bin_size)]

        self.price = self.df.iloc[-1]
        self.monthly_returns = self.df[list(self.stocks)].pct_change().iloc[1:]
        self.avg_monthly_returns = self.monthly_returns.mean(axis=0)
        self.covariance_matrix = self.monthly_returns.cov()


In [39]:
class MultiPeriod(SinglePeriod):
    """Solve the multi-period (dynamic) portfolio optimization problem.
    """
    def __init__(self, stocks=('AAPL', 'MSFT', 'AAL', 'WMT'), budget=1000,
                 bin_size=None, gamma=None, file_path=None,
                 dates=None, model_type='CQM', alpha=0.005, baseline='^GSPC',
                 sampler_args=None, t_cost=0.01, verbose=True):
        super().__init__(stocks=stocks, budget=budget, t_cost=t_cost,
                         bin_size=bin_size, gamma=gamma, file_path=file_path,
                         dates=dates, model_type=model_type, alpha=alpha,
                         baseline=baseline, sampler_args=sampler_args, verbose=verbose)

    def run(self, max_risk=0, min_return=0, num=0, init_holdings=None):
        if not self.dates:
            #self.dates = ['2010-01-01', '2012-12-31']
            #self.dates = ['2019-01-01', '2022-05-01']
            self.dates = ['2010-01-01', '2010-06-01']
        self.load_data()
        num_months = len(self.df_all)
        first_purchase = True
        result = {}
        baseline_result = {}
        self.baseline_values = [0]
        self.update_values = [0]
        months = []
        # Define dataframe to save output data
        headers = ['Date', 'Value'] + self.stocks + ['Variance', 'Returns']
        self.opt_results_df = pd.DataFrame(columns=headers)
        row = []

        self.price_df = pd.DataFrame(columns=self.stocks)

        for i in range(3, num_months):

            # Look at just the data up to the current montho
            print(f'i: {i}')
            df = self.df_all.iloc[0:i+1,:].copy()
            print(f'df: {df}')
            baseline_df_current = self.df_baseline.iloc[0:i+1,:]
            print("\nDate:", df.last_valid_index())
            months.append(df.last_valid_index().date())

            if first_purchase:
                budget = self.budget
                initial_budget = self.budget
                baseline_shares = (budget / baseline_df_current.iloc[-1])
                print(f'baseline_shares: {baseline_shares}')
                baseline_result = {self.baseline[0]: baseline_shares}
            else:
                # Compute profit of current portfolio
                print('pre-budget')
                # display(pd.DataFrame({
                #     'col1':[df.iloc[-1][s] for s in self.stocks],
                #     'col2': [result[stocks][s] for s in self.stocks]}))
                print(f'{[df.iloc[-1][s] for s in self.stocks]}')
                print(f'{[result["stocks"][s] for s in self.stocks]}')

                budget = sum([df.iloc[-1][s]*result['stocks'][s] for s in self.stocks])
                print(f'budget: {budget}')
                print(f'initial_budget: {initial_budget}')
                self.update_values.append(budget - initial_budget)

                # Compute profit of fund portfolio
                print('pre-baseline')
                print(self.baseline)
                print(self.df_baseline)
                print(baseline_df_current)
                print([baseline_df_current.iloc[-1][s] for s in self.baseline])
                print([baseline_result[s] for s in self.baseline])
                fund_value = sum([baseline_df_current.iloc[-1][s]*baseline_result[s]
                                  for s in self.baseline])
                print(f'fund_value: {fund_value}')
                self.baseline_values.append(fund_value - initial_budget)

                self.budget = budget

            self.load_data(df=df)
            self.price_df.loc[i-2] = list(self.price.values)
            update_values = np.array(self.update_values, dtype=object)
            baseline_values = np.array(self.baseline_values, dtype=object)
            print(f'update_values: {update_values}')
            print(f'baseline_values: {baseline_values}')

            # Making solve run
            if self.model_type == 'DQM':
                print(f"\nMulti-Period DQM Run...")

                # self.build_dqm()
                # self.solution['DQM'] = self.solve_dqm()
                # result = self.solution['DQM']
            else:
                print(f"\nMulti-Period CQM Run...")
                if self.t_cost and not first_purchase:
                    self.budget = 0

                # self.solution['CQM'] = self.solve_cqm(max_risk=max_risk,
                #                                       min_return=min_return,
                #                                       init_holdings=init_holdings)
                # result = self.solution['CQM']
                result = pd.read_pickle(f'result_df_{i}.pkl')
                print(f'result: {result}')
                init_holdings = result['stocks']
            value = sum([self.price[s]*result['stocks'][s] for s in self.stocks])
            returns = result['return']
            variance = result['risk']
            row = [months[-1].strftime('%Y-%m-%d'), value] + \
                  [result['stocks'][s] for s in self.stocks] + \
                  [variance, returns]
            self.opt_results_df.loc[i-2] = row
            first_purchase = False
        #print(self.opt_results_df)
        print(f'\nRun completed.\n')

In [40]:
stocks = ['AAPL', 'MSFT', 'AAL', 'WMT']
budget = 1000
params = {}
bin_size = 10
dates = ['2010-01-01', '2010-06-01']
file_path = './basic_data.csv'
gamma = None
model_type = 'CQM'
alpha = [0.005]
verbose = False
baseline='^GSPC'
t_cost = 0.01
min_return = 0.0
max_risk = 0.0
num = 0
my_portfolio = MultiPeriod(stocks=stocks, budget=budget,
                           sampler_args=params,
                           bin_size=bin_size, dates=dates,
                           file_path=file_path, gamma=gamma,
                           model_type=model_type, alpha=alpha,
                           verbose=verbose, baseline=baseline,
                           t_cost=t_cost)
my_portfolio.run(min_return=min_return, max_risk=max_risk, num=num)


Loading live data from the web from Yahoo! finance from 2010-01-01 to 2010-06-01...
i: 3
df:                 AAPL       MSFT       AAL        WMT
Date                                                
2010-01-29  5.864813  21.670124  5.005956  40.058235
2010-02-26  6.248350  22.150024  6.910295  40.538067
2010-03-31  7.176042  22.629028  6.929151  41.920116
2010-04-30  7.972737  23.594755  6.665183  40.442356

Date: 2010-04-30 00:00:00
baseline_shares: ^GSPC    0.84268
Name: 2010-04-30 00:00:00, dtype: float64

Loading data from DataFrame...
update_values: [0]
baseline_values: [0]

Multi-Period CQM Run...
result:       stocks  return    risk
AAL       22  109.25  363.75
AAPL     107  109.25  363.75
MSFT       0  109.25  363.75
WMT        0  109.25  363.75
i: 4
df:                 AAPL       MSFT       AAL        WMT
Date                                                
2010-01-29  5.864813  21.670124  5.005956  40.058235
2010-02-26  6.248350  22.150024  6.910295  40.538067
2010-03-31  7.

In [None]:
df_3 = pd.read_pickle('df_3.pkl')
display(df_3)
df_4 = pd.read_pickle('df_4.pkl')
display(df_4)

In [None]:
stocks = ['AAPL', 'MSFT', 'AAL', 'WMT']
budget = 1000
baseline='^GSPC'

initial_budget = budget
num_months = len(df_all)
print(f'num_month: {num_months}')
first_purchase = True
result = {}
baseline_result = {}
baseline_values = [0]
update_values = [0]
months = []

# Define dataframe to save output data
headers = ['Date', 'Value'] + stocks + ['Variance', 'Returns']
opt_results_df = pd.DataFrame(columns=headers)
row = []

price_df = pd.DataFrame(columns=stocks)

# Look at just the data up to the current month
i = 3
df = df_all.iloc[0:i+1,:].copy()
baseline_df_current = df_baseline.iloc[0:i+1,:]
print("\nDate:", df.last_valid_index())
months.append(df.last_valid_index().date())

if first_purchase:
    print(f'budget: {budget}')
    display(baseline_df_current.iloc[-1])
    baseline_shares = (budget / baseline_df_current.iloc[-1])
    print(baseline_shares)
    baseline_result.update({baseline[0]: baseline_shares})
else:
    # Compute profit of current portfolio
    budget = sum([df.iloc[-1][s]*result['stocks'][s] for s in stocks])
    update_values.append(budget - initial_budget)

    # Compute profit of fund portfolio
    fund_value = sum([baseline_df_current.iloc[-1][s]*baseline_result[s] for s in baseline])
    baseline_values.append(fund_value - initial_budget)

first_purchase = False


