# Replicate ARKK Invest Fund
1) Get the holdings weightings
2) Get the account data from Alpaca
3) Calculate the target amount to invest and target number of shares in each constituent holding based on portfolio data
4) Calculate trades required to balance portfolio
5) Book the trades
6) Display orders to check what has been booked

## Get the holding weighting for ARKK

In [1]:
import io
import pandas as pd
import requests


# Download CSV containg fund constituients and weightings from Ark
url=r"https://ark-funds.com/wp-content/uploads/funds-etf-csv/ARK_INNOVATION_ETF_ARKK_HOLDINGS.csv"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
response = requests.get(url, headers=headers)

# Load it into pandas
arkk_data = pd.read_csv(io.StringIO(response.content.decode('utf-8')))

# Remove any irrelevant rows, e.g. where fund is NaN or ticker is NaN. NaN fund is the fund risk statement, NaN ticker is not a stock
arkk_data = arkk_data[arkk_data['fund'].notna()]
arkk_data = arkk_data[arkk_data['ticker'].notna()]

# Convert weight to number, we will need to remove the %
arkk_data['weight'] = arkk_data['weight (%)'].str.slice(stop=-1).astype('float')

arkk_data.head()

Unnamed: 0,date,fund,company,ticker,cusip,shares,market value ($),weight (%),weight
0,04/11/2022,ARKK,TESLA INC,TSLA,88160R101,1063294,"$1,090,397,364.06",10.19%,10.19
1,04/11/2022,ARKK,TELADOC HEALTH INC,TDOC,87918A105,10564463,"$697,888,425.78",6.52%,6.52
2,04/11/2022,ARKK,ROKU INC,ROKU,77543R102,6050785,"$694,267,070.90",6.49%,6.49
3,04/11/2022,ARKK,ZOOM VIDEO COMMUNICATIONS-A,ZM,98980L101,6209236,"$688,604,272.40",6.44%,6.44
4,04/11/2022,ARKK,COINBASE GLOBAL INC -CLASS A,COIN,19260Q107,4146506,"$667,338,675.64",6.24%,6.24


## Get the account data from Alpaca

### Define a class to wrap the Alpaca API to provide data in dicts and dataframes, with type conversion

In [87]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import alpaca_trade_api as tradeapi

class Alpaca:
    """
    A wrapper around the Alpaca API to convert Alpaca native types to dicts and pandas DataFrames. Converts returned object to correct python data types
    """
    __api = None
    
    def __init__(self):
        """
        Connect to Alpaca using connection params contained in .alpaca_paper.vault
        """
        con_params = {}
        with open(".alpaca-paper.vault") as f:
            for line in f:
               (key, val) = line.split("=")
               con_params[key] = val.rstrip()

        # Connect
        self.__api = tradeapi.REST(con_params['key'], con_params['secret-key'], base_url=con_params['url'])
    

    @property
    def account_data(self) -> dict:
        """
        Returns the account data as a dict
        """
        
        account_obj = self.__api.get_account()

        # Get Account object vars as dict
        account_data = vars(account_obj)['_raw']

        # Convert items to correct data type
        for float_item in ['buying_power', 'regt_buying_power', 'daytrading_buying_power', 'non_marginable_buying_power', 'cash', 'accrued_fees', 'pending_transfer_in', 
                           'portfolio_value', 'multiplier', 'equity', 'last_equity', 'long_market_value', 'short_market_value', 'initial_margin', 'maintenance_margin', 
                           'last_maintenance_margin', 'sma']:
            account_data[float_item] = float(account_data[float_item])  

        for int_item in ['daytrade_count']:
            account_data[int_item] = int(account_data[int_item])

        for bool_item in ['pattern_day_trader', 'trading_blocked', 'transfers_blocked', 'account_blocked', 'trade_suspended_by_user', 'shorting_enabled']:
            account_data[bool_item] = bool(account_data[bool_item])

        for datetime_item in ['created_at']:
            account_data[datetime_item] = datetime.fromisoformat(account_data[datetime_item][0:-1])

        return account_data

    @property
    def positions(self) -> pd.DataFrame:
        """
        Returns the postions as a Pandas DataFrame
        """
        
        positions_list = self.__api.list_positions()
        columns = ['asset_class', 'asset_id', 'avg_entry_price', 'change_today', 'cost_basis', 'current_price', 'exchange', 'lastday_price', 'market_value', 
                   'qty', 'side', 'symbol', 'unrealized_intraday_pl', 'unrealized_intraday_plpc', 'unrealized_pl', 'unrealized_plpc']

        if len(positions_list) > 0:
            positions_data = pd.DataFrame([ position._raw for position in positions_list], columns=columns)
        else:
            positions_data = pd.DataFrame(columns=columns)
            
        # Replace None with NaN
        positions_data.fillna(inplace=True, value=np.NaN)

        # Convert items to correct data type
        positions_data = positions_data.astype({'qty': 'int32', 'avg_entry_price': 'float64', 'current_price': 'float64'})
        positions_data = positions_data.astype({'avg_entry_price': 'float64', 'change_today': 'float64', 'current_price': 'float64', 'lastday_price': 'float64',
                                                'market_value': 'float64', 'qty': 'int32', 'unrealized_intraday_pl': 'float64', 
                                                'unrealized_intraday_plpc': 'float64', 'unrealized_pl': 'float64', 'unrealized_plpc': 'float64'})

        return positions_data

    @property
    def orders(self) -> pd.DataFrame:
        """
        Returns the orders as a dataframe
        """
        
        orders_list = self.__api.list_orders(limit=500)
        columns = ['id', 'client_order_id', 'created_at', 'updated_at', 'submitted_at',
                   'filled_at', 'expired_at', 'canceled_at', 'failed_at', 'replaced_at',
                   'replaced_by', 'replaces', 'asset_id', 'symbol', 'asset_class',
                   'notional', 'qty', 'filled_qty', 'filled_avg_price', 'order_class',
                   'order_type', 'type', 'side', 'time_in_force', 'limit_price',
                   'stop_price', 'status', 'extended_hours', 'legs', 'trail_percent',
                   'trail_price', 'hwm']

        if len(orders_list) > 0:
            orders_data = pd.DataFrame([ order._raw for order in orders_list], columns=columns)
        else:
            orders_data = pd.DataFrame(columns=columns)
            
        # Replace None with NaN
        orders_data.fillna(inplace=True, value=np.NaN)

        # Convert items to correct data type
        orders_data = orders_data.astype({'qty': 'int32', 'filled_qty': 'int32', 'created_at': 'datetime64[ns]', 'filled_at': 'datetime64[ns]', 'updated_at': 'datetime64[ns]', 
                                          'submitted_at': 'datetime64[ns]', 'filled_at': 'datetime64[ns]', 'expired_at': 'datetime64[ns]', 'canceled_at': 'datetime64[ns]',
                                          'failed_at': 'datetime64[ns]', 'replaced_at': 'datetime64[ns]'})

        return orders_data
    
    def last_price(self, symbol: str) -> float:
        """
        Get the last price for the specified symbol
        """
        # TODO update times to now when aplaca subscription is upgraded
        bars = self.__api.get_bars(symbol, tradeapi.TimeFrame.Minute, pd.Timestamp('now').date() - timedelta(days=3), pd.Timestamp('now').date() - timedelta(days=2), 
                                   adjustment='raw', limit=1).df
        
        return bars['close'].values[0]
    
    def place_order(self, symbol: str, direction: str, qty: int):
        """
        Places an order. Nets against existing orders if unfilled.
        """
        # TODO cancel or edit orders first before booking new trades
        response = self.__api.submit_order(symbol=symbol, qty=qty, side=direction, type='market', time_in_force='gtc')
        
        print(f"{symbol} {qty} Response: {response}")
        

# Instance to use throughout notebook
al = Alpaca()

### Test the output

In [73]:
al.account_data

{'id': 'c8b48627-6c21-48f5-92cd-1a6cdcc3c077',
 'account_number': 'PA3LCDS5GZ05',
 'status': 'ACTIVE',
 'crypto_status': 'ACTIVE',
 'currency': 'USD',
 'buying_power': 3601.88,
 'regt_buying_power': 3601.88,
 'daytrading_buying_power': 0.0,
 'non_marginable_buying_power': 0.0,
 'cash': -96690.01,
 'accrued_fees': 0.0,
 'pending_transfer_in': 0.0,
 'portfolio_value': 100291.89,
 'pattern_day_trader': False,
 'trading_blocked': False,
 'transfers_blocked': False,
 'account_blocked': False,
 'created_at': datetime.datetime(2022, 3, 9, 14, 54, 49, 878543),
 'trade_suspended_by_user': False,
 'multiplier': 2.0,
 'shorting_enabled': True,
 'equity': 100291.89,
 'last_equity': 100000.0,
 'long_market_value': 196981.9,
 'short_market_value': 0.0,
 'initial_margin': 98490.95,
 'maintenance_margin': 59094.57,
 'last_maintenance_margin': 0.0,
 'sma': 100000.0,
 'daytrade_count': 35}

In [88]:
al.positions[['symbol','side', 'qty', 'avg_entry_price', 'current_price']].head()

Unnamed: 0,symbol,side,qty,avg_entry_price,current_price
0,ZM,long,112,108.181875,110.0
1,VCYT,long,86,25.585116,25.32
2,U,long,102,89.375,90.06
3,TXG,long,42,71.46,72.55
4,TWST,long,52,45.498654,43.95


In [89]:
al.orders[['symbol','side', 'qty', 'filled_qty']].head()

Unnamed: 0,symbol,side,qty,filled_qty
0,CGEN,sell,96,0
1,BLI,sell,83,0
2,CERS,sell,178,0
3,TWOU,sell,49,0
4,MTLS,sell,34,0


In [90]:
"""
Add positions and unfilled orders to dataframe
"""

# Get unfilled orders. Calculate unfilled quantity, and sign based on trade direction. Group by symbol.
orders = al.orders
unfilled_orders = orders[orders['filled_qty'] < orders['qty']]
if len(unfilled_orders.index) > 0:
    unfilled_orders['unfilled_qty'] = unfilled_orders.apply(lambda row: row['qty'] - row['filled_qty'] if row['side'] == 'buy' else (row['qty'] - row['filled_qty']) * -1, axis=1)
else:
    unfilled_orders['unfilled_qty'] = 0
    
unfilled_orders = unfilled_orders[['symbol', 'unfilled_qty']]
unfilled_orders = unfilled_orders.groupby(['symbol']).sum().reset_index()

# Get positions, rename columns ready for merge
positions = al.positions
positions.rename({'qty': 'position_qty', 'side': 'position_side'}, axis='columns', inplace=True)

# Rename arc data ticker column to symbol ready for merge
arkk_data.rename({'ticker': 'symbol'}, axis='columns', inplace=True)

# Merge the data frames
data = pd.merge(arkk_data, unfilled_orders, how='left', on='symbol')
data = pd.merge(data, positions, how='left', on='symbol')

# position_qty and unfilled_qty should be 0 if NaN
data['position_qty'] = data['position_qty'].fillna(0)
data['unfilled_qty'] = data['unfilled_qty'].fillna(0)

# Remove unneeded columns
data = data[['symbol', 'company', 'weight', 'position_qty', 'unfilled_qty']]

data.head()

Unnamed: 0,symbol,company,weight,position_qty,unfilled_qty
0,TSLA,TESLA INC,10.19,27,-18
1,TDOC,TELADOC HEALTH INC,6.52,216,-123
2,ROKU,ROKU INC,6.49,108,-53
3,ZM,ZOOM VIDEO COMMUNICATIONS-A,6.44,112,-56
4,COIN,COINBASE GLOBAL INC -CLASS A,6.24,76,-39


## Calculate the target amount to invest and target number of shares in each constituent holding based on portfolio data

In [91]:
# Target investment amount to maintain weighting
data['target_invest_amt'] = Alpaca().account_data['portfolio_value'] / 100 * data['weight']

# Add target number of shares to get close to target investment amount, using last price
for symbol in data['symbol']:
    # Update df
    data.loc[data['symbol']==symbol, 'last_price'] = al.last_price(symbol)

    # Calculate target number of shares Max number of shares that can be purchased with target_invest_amt
    data['target_shares'] = (data['target_invest_amt'] / data['last_price']).apply(np.floor)

data = data.astype({'target_shares': 'int32'})
data.head()

Unnamed: 0,symbol,company,weight,position_qty,unfilled_qty,target_invest_amt,last_price,target_shares
0,TSLA,TESLA INC,10.19,27,-18,10207.056022,1075.1,9
1,TDOC,TELADOC HEALTH INC,6.52,216,-123,6530.913176,70.0,93
2,ROKU,ROKU INC,6.49,108,-53,6500.862962,117.18,55
3,ZM,ZOOM VIDEO COMMUNICATIONS-A,6.44,112,-56,6450.779272,114.68,56
4,COIN,COINBASE GLOBAL INC -CLASS A,6.24,76,-39,6250.444512,167.0,37


## Calculate trades required to balance portfolio

In [96]:
# How many shares to we need to rebalance
data['rebalance_qty'] = (data['target_shares'] - data['position_qty'] - data['unfilled_qty'])

# Get the trades. Where rebalance qty != 0
trades = data[data['rebalance_qty'] != 0]

# Remove unnecessary columns
trades = trades[['symbol', 'company', 'target_invest_amt', 'last_price', 'target_shares', 'position_qty', 'unfilled_qty', 'rebalance_qty']]

trades

Unnamed: 0,symbol,company,target_invest_amt,last_price,target_shares,position_qty,unfilled_qty,rebalance_qty


## Book the trades

In [97]:
# Iterate rows, get rebalance_qty and book trades
for rownum, rowdata in trades.iterrows():
    # Number of shares for symbol
    symbol = rowdata['symbol']
    shares = rowdata['rebalance_qty']
    
    # Book the trade
    if shares != 0:
        al.place_order(symbol=symbol, qty=abs(shares), direction='buy' if shares > 0 else 'sell')

## Display the orders to check what has been booked

In [98]:
al.orders[['symbol','side', 'qty']]

Unnamed: 0,symbol,side,qty
0,CGEN,sell,96
1,BLI,sell,83
2,CERS,sell,178
3,TWOU,sell,49
4,MTLS,sell,34
5,TSP,sell,64
6,SSYS,sell,40
7,PACB,sell,98
8,NVTA,sell,145
9,VCYT,sell,42
