In [1]:
import pandas as pd
import numpy as np
import urllib.request
import csv
import requests
from datetime import datetime
from os import listdir

In [40]:
# function to download the ARK .csv file
def download_file(etf_ticker,url,data_folder,today_date):
    file_name = data_folder + etf_ticker + '_' + today_date +'.csv'
    print('Downloading ' + etf_ticker)
    r = requests.get(url)
    with open(file_name, 'wb') as f:
        f.write(r.content)
    return file_name

In [42]:
# funtion to read and process the ARK .csv file
def process_df(input_file):
    df = pd.read_csv(input_file)
    df.drop(['date','fund','cusip'],axis=1,inplace=True)
    df['ticker'].fillna('NA',inplace = True) 
    df.dropna(inplace=True)
    return df

In [9]:
# find all CSV files for a given preffix and suffix
def find_csv_filenames(path_to_dir, preffix, suffix=".csv"):
    filenames = listdir(path_to_dir)
    return [ filename for filename in filenames if (filename.endswith(suffix) & filename.startswith(preffix))]

In [10]:
# function to find the most recent file except for today's file
def find_most_recent_file(today_date,preffix,file_list):
    most_recent_date = datetime.strptime("Jan-01-1900","%b-%d-%Y")
    today = datetime.strptime(today_date,"%b-%d-%Y")
    for file_name in file_list:
        file_date = datetime.strptime(file_name.split('_')[1].split('.')[0],"%b-%d-%Y")
#        print(file_date)
        if (file_date > most_recent_date) & (file_date < today):
            most_recent_date = file_date
    return preffix+'_' + most_recent_date.strftime("%b-%d-%Y") + '.csv'            

In [56]:
# function to merge the two dataframe and calculate the change
def merge_df(df_today,df_recent):
    df_merged = pd.merge(df_today,df_recent,how = 'outer',on = ['company','ticker'])
    df_merged.fillna(0,inplace = True)
    df_merged['shares_change'] = df_merged['shares'] - df_merged['shares_old']
    df_merged['market value_change($)'] = df_merged['market value($)'] - df_merged['market value_old($)']
    df_merged['weight_change(%)'] = df_merged['weight(%)'] - df_merged['weight_old(%)']
    df_merged = df_merged[['company','ticker','shares_change','market value_change($)','weight_change(%)','shares','market value($)','weight(%)','shares_old','market value_old($)','weight_old(%)']]
    return df_merged

In [38]:
data_folder = '/Users/zhensun/Google Drive/SelfStudy/ARK_index_tracking/data/'
arkk_url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_INNOVATION_ETF_ARKK_HOLDINGS.csv'
arkg_url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_GENOMIC_REVOLUTION_MULTISECTOR_ETF_ARKG_HOLDINGS.csv'
arkq_url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_AUTONOMOUS_TECHNOLOGY_&_ROBOTICS_ETF_ARKQ_HOLDINGS.csv'
arkw_url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_NEXT_GENERATION_INTERNET_ETF_ARKW_HOLDINGS.csv'
arkf_url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_FINTECH_INNOVATION_ETF_ARKF_HOLDINGS.csv'

In [3]:
# get date and time and download today's file
today = datetime.today()
today_date = today.strftime("%b-%d-%Y")

In [41]:
file_name_arkk_today = download_file('ARKK',arkk_url,data_folder,today_date)
file_name_arkg_today = download_file('ARKG',arkg_url,data_folder,today_date)
file_name_arkq_today = download_file('ARKQ',arkq_url,data_folder,today_date)
file_name_arkw_today = download_file('ARKW',arkw_url,data_folder,today_date)
file_name_arkf_today = download_file('ARKF',arkf_url,data_folder,today_date)  

DownloadingARKK
DownloadingARKG
DownloadingARKQ
DownloadingARKW
DownloadingARKF


In [44]:
df_arkk_today = process_df(file_name_arkk_today)
df_arkg_today = process_df(file_name_arkg_today)
df_arkq_today = process_df(file_name_arkq_today)
df_arkw_today = process_df(file_name_arkw_today)
df_arkf_today = process_df(file_name_arkf_today)

In [63]:
#ARKK
arkk_list = find_csv_filenames(data_folder,'ARKK')
most_recent_arkk_file = find_most_recent_file(today_date,'ARKK',arkk_list)
df_arkk_recent = process_df(data_folder + most_recent_arkk_file)
df_arkk_recent.rename(columns = {"shares":"shares_old","market value($)":"market value_old($)","weight(%)":"weight_old(%)"},inplace =True)
arkk_merged = merge_df(df_arkk_today,df_arkk_recent)
arkk_merged[arkk_merged['shares_change']!=0]

Unnamed: 0,company,ticker,shares_change,market value_change($),weight_change(%),shares,market value($),weight(%),shares_old,market value_old($),weight_old(%)
1,ROKU INC,ROKU,366.0,23599.0,0.0,3704366.0,1229924000.0,6.95,3704000.0,1229900000.0,6.95
47,EXONE CO/THE,XONE,551336.0,5232178.64,0.03,551336.0,5232179.0,0.03,0.0,0.0,0.0
48,AMAZON,AMZN,-2001.0,-6323552.33,-0.03,0.0,0.0,0.0,2001.0,6323552.0,0.03


In [62]:
#ARKG
arkg_list = find_csv_filenames(data_folder,'ARKG')
most_recent_arkg_file = find_most_recent_file(today_date,'ARKG',arkg_list)
df_arkg_recent = process_df(data_folder + most_recent_arkg_file)
df_arkg_recent.rename(columns = {"shares":"shares_old","market value($)":"market value_old($)","weight(%)":"weight_old(%)"},inplace =True)
arkg_merged = merge_df(df_arkg_today,df_arkg_recent)
arkg_merged[arkg_merged['shares_change']!=0]

Unnamed: 0,company,ticker,shares_change,market value_change($),weight_change(%),shares,market value($),weight(%),shares_old,market value_old($),weight_old(%)
1,ROKU INC,ROKU,366.0,23599.0,0.0,3704366.0,1229924000.0,6.95,3704000.0,1229900000.0,6.95
47,EXONE CO/THE,XONE,551336.0,5232178.64,0.03,551336.0,5232179.0,0.03,0.0,0.0,0.0
48,AMAZON,AMZN,-2001.0,-6323552.33,-0.03,0.0,0.0,0.0,2001.0,6323552.0,0.03


In [None]:
#ARKQ
arkq_list = find_csv_filenames(data_folder,'ARKQ')
most_recent_arkq_file = find_most_recent_file(today_date,'ARKQ',arkk_list)
df_arkq_recent = process_df(data_folder + most_recent_arkq_file)
df_arkq_recent.rename(columns = {"shares":"shares_old","market value($)":"market value_old($)","weight(%)":"weight_old(%)"},inplace =True)
arkq_merged = merge_df(df_arkq_today,df_arkq_recent)
arkq_merged[arkq_merged['shares_change']!=0]

In [None]:
#ARKW
arkw_list = find_csv_filenames(data_folder,'ARKW')
most_recent_arkw_file = find_most_recent_file(today_date,'ARKW',arkk_list)
df_arkw_recent = process_df(data_folder + most_recent_arkw_file)
df_arkw_recent.rename(columns = {"shares":"shares_old","market value($)":"market value_old($)","weight(%)":"weight_old(%)"},inplace =True)
arkw_merged = merge_df(df_arkw_today,df_arkw_recent)
arkw_merged[arkw_merged['shares_change']!=0]

In [None]:
#ARKF
arkf_list = find_csv_filenames(data_folder,'ARKF')
most_recent_arkf_file = find_most_recent_file(today_date,'ARKF',arkk_list)
df_arkf_recent = process_df(data_folder + most_recent_arkf_file)
df_arkf_recent.rename(columns = {"shares":"shares_old","market value($)":"market value_old($)","weight(%)":"weight_old(%)"},inplace =True)
arkf_merged = merge_df(df_arkf_today,df_arkf_recent)
arkf_merged[arkf_merged['shares_change']!=0]