# Algorithmic Trading Model for Quandl Bulk Fetch
### David Lowe
### August 31, 2021

Main URL: https://www.quandl.com/api/v3/datatables/SHARADAR/SEP?qopts.export=true&api_key=[Key]

In [None]:
!pip install python-dotenv PyMySQL



In [None]:
import os
import sys
import time
import requests
import zipfile
import pandas as pd
from datetime import date, datetime
from dotenv import load_dotenv

In [None]:
startTimeScript = datetime.now()

In [None]:
# Set up the parent directory location for loading the dotenv files
from google.colab import drive
drive.mount('/content/gdrive')
gdrivePrefix = '/content/gdrive/My Drive/Quandl_Bulk_Fetch/'
env_path = '/content/gdrive/My Drive/Colab Notebooks/'
dotenv_path = env_path + "python_script.env"
load_dotenv(dotenv_path=dotenv_path)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


True

## Task 1) Set up and Retrieve Files from Quandl

In [None]:
api_key = os.environ.get('QUANDL_API')
tables = ['SEP','SFP','ACTIONS','INDICATORS','TICKERS']
as_of_date = datetime.now().date()
# as_of_date = date(2021, 8, 31)

In [None]:
def bulk_fetch(table_name):
    dest_file = gdrivePrefix + 'SHARADAR_' + table_name + '_' + as_of_date.strftime('%Y%m%d') + '.zip'
    url = 'https://www.quandl.com/api/v3/datatables/SHARADAR/%s.json?qopts.export=true&api_key=%s' % (table, api_key)
    resp = requests.get(url)

    valid = ['fresh','regenerating']
    invalid = ['generating']
    status = ''

    while status not in valid:
        resp_dict = resp.json()
        print(resp_dict)
        last_refreshed_time = resp_dict['datatable_bulk_download']['datatable']['last_refreshed_time']
        status = resp_dict['datatable_bulk_download']['file']['status']
        file_link = resp_dict['datatable_bulk_download']['file']['link']
        print('Table', table_name, 'is', status, 'as of', last_refreshed_time)
        if status not in valid:
          time.sleep(60)

    print('fetching from %s' % file_link)
    resp = requests.get(file_link, stream=True)
    with open(dest_file, 'wb') as fd:
        for chunk in resp.iter_content(chunk_size=128):
            fd.write(chunk)
    print(dest_file, 'has been fetched')

In [None]:
for table in tables:
    bulk_fetch(table)

{'datatable_bulk_download': {'file': {'link': 'https://quandl-production-datahub.s3.amazonaws.com/export/SHARADAR/SEP/SHARADAR_SEP_2_0bd2000858d1d8d1f48d4cdea5f8c9e2.zip?X-Amz-Expires=1800&X-Amz-Date=20210830T220332Z&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEI7//////////wEaCXVzLWVhc3QtMSJHMEUCIDO1wUVI/gfw%2BbaoESQPFvw5yaA99Bvq%2BpT7njghgB82AiEA0Nm5Rsq%2BkFOBaB9jNGBT0d8te6325TwYBCZX5xOjOiwqgwQIxv//////////ARACGgwzMDYzMjA1MDAyMDQiDBsImA%2B2MTJ4NscCzSrXA95wMoOtLowNp36hO6XhMfw7rW0CYhf7/gsf62foWMBopKEqrJ4ASMXy%2BBhJl1OohylUQQxy1hqWo7aRxN9NN2rOSldImJvc2f/ezoV7PbxWlgQXt/S30yU7UfVk9ta%2BMZQC8VjTHrdjizagK95aSTtPCpLq93LBpt%2BfEzhnJRICJuS5126BIIOnprydLdNA9%2BR%2ByVfw6%2B/mWmuHwDrhUjgJg8vzac1HHGYmsfMhhaMkR3OwKpq3hV38GkfrtM7v/bCILffjG0s7BO2vQrGZHlH4/Xl8mUQZEOg3OIUGixLvD/VGeB3BM6amD3k7X9VNiKb2Os9HYgCf%2B/wAHFw%2B1YHbBFbV6qjAq0a3cAXIAqNxrThG8qHYSsQQYylR4tV8pifUiIDYqSs2nIKzyvlFy0JONMUjQy2TRbIcl0MaBRELbCUpXhdp0%2BU8aqYOnAEFN6b%2B8wmz%2BF87b5k0obkbLFvoX6u5ufx6HhQ2MVZPCChbiwLlDSHokuMpRCK/%2B/t6yuTFTBA9Lqgaho

## Task 2) Transform Data from Quandl for Import into Fund Manager

In [None]:
SEP_ZIP_FILE = gdrivePrefix + 'SHARADAR_' + 'SEP' + '_' + as_of_date.strftime('%Y%m%d') + '.zip'
SFP_ZIP_FILE = gdrivePrefix + 'SHARADAR_' + 'SFP' + '_' + as_of_date.strftime('%Y%m%d') + '.zip'
if os.path.exists(SEP_ZIP_FILE) and os.path.exists(SFP_ZIP_FILE):
    print('Retrieved SEP dataset:', SEP_ZIP_FILE)
    print('Retrieved SFP dataset:', SFP_ZIP_FILE)
else:
    sys.exit('Either SEP or SFP data files is not available. Script processing cannot continue!!!')

Retrieved SEP dataset: /content/gdrive/My Drive/Quandl_Bulk_Fetch/SHARADAR_SEP_20210830.zip
Retrieved SFP dataset: /content/gdrive/My Drive/Quandl_Bulk_Fetch/SHARADAR_SFP_20210830.zip


In [None]:
!rm -rf '/content/gdrive/My Drive/Quandl_Bulk_Fetch/FM_interface/'
!mkdir '/content/gdrive/My Drive/Quandl_Bulk_Fetch/FM_interface/'

In [None]:
FM_INTERFACE_DIR = gdrivePrefix + 'FM_interface/'
zip_sep = zipfile.ZipFile(SEP_ZIP_FILE, 'r')
zip_sep.extractall(FM_INTERFACE_DIR)
zip_sep.close()
zip_sfp = zipfile.ZipFile(SFP_ZIP_FILE, 'r')
zip_sfp.extractall(FM_INTERFACE_DIR)
zip_sfp.close()

In [None]:
if len(os.listdir(FM_INTERFACE_DIR)) == 2:
    raw_file_1 = FM_INTERFACE_DIR + os.listdir(FM_INTERFACE_DIR)[0]
    print('Found', raw_file_1, 'for processing...')
    raw_file_2 = FM_INTERFACE_DIR + os.listdir(FM_INTERFACE_DIR)[1]
    print('Found', raw_file_2, 'for processing...')
    df_dataset_import = pd.concat(map(pd.read_csv, [raw_file_1, raw_file_2]), ignore_index=True)
    df_dataset_import['date'] = pd.to_datetime(df_dataset_import['date'])
    df_dataset_import.drop(columns=['close','closeunadj','lastupdated'], inplace=True)
    df_dataset_import.sort_values(by=['date', 'ticker'], ascending=True, inplace=True, ignore_index=True)
    print()
    print(df_dataset_import.head())
    print()
    print(df_dataset_import.tail())
    print()
    df_dataset_import.info()
else:
    sys.exit('Not all data files are available. Script processing cannot continue!!!')

Found /content/gdrive/My Drive/Quandl_Bulk_Fetch/FM_interface/SHARADAR_SEP_2_0bd2000858d1d8d1f48d4cdea5f8c9e2.csv for processing...
Found /content/gdrive/My Drive/Quandl_Bulk_Fetch/FM_interface/SHARADAR_SFP_2_57b147f453795a51de647236d26a618c.csv for processing...

  ticker       date   open    high     low     volume  closeadj
0      A 2011-01-03  41.56  42.140  41.411  3572300.0    27.710
1   AACC 2011-01-03   5.99   6.250   5.990    20400.0     6.150
2   AACG 2011-01-03   3.67   3.760   3.670     2400.0     1.138
3  AACPF 2011-01-03   9.70   9.700   9.630     1160.0     9.630
4   AADR 2011-01-03  29.98  30.063  29.880     1600.0    28.656

         ticker       date    open   high     low     volume  closeadj
23320767  ZWRKW 2021-08-30   0.650   0.65   0.650      235.0      0.65
23320768     ZY 2021-08-30  13.380  13.49  12.910   627532.0     13.00
23320769   ZYME 2021-08-30  32.860  33.72  32.385   154759.0     32.55
23320770   ZYNE 2021-08-30   4.158   4.20   4.050  2874199.0      

In [None]:
df_dataset_import['date'] = df_dataset_import['date'].dt.strftime('%m/%d/%y')
new_order = ['ticker', 'date', 'closeadj', 'open', 'high', 'low', 'volume']
df_dataset_import = df_dataset_import.reindex(columns=new_order)
print(df_dataset_import.head())
print()
print(df_dataset_import.tail())

  ticker      date  closeadj   open    high     low     volume
0      A  01/03/11    27.710  41.56  42.140  41.411  3572300.0
1   AACC  01/03/11     6.150   5.99   6.250   5.990    20400.0
2   AACG  01/03/11     1.138   3.67   3.760   3.670     2400.0
3  AACPF  01/03/11     9.630   9.70   9.700   9.630     1160.0
4   AADR  01/03/11    28.656  29.98  30.063  29.880     1600.0

         ticker      date  closeadj    open   high     low     volume
23320767  ZWRKW  08/30/21      0.65   0.650   0.65   0.650      235.0
23320768     ZY  08/30/21     13.00  13.380  13.49  12.910   627532.0
23320769   ZYME  08/30/21     32.55  32.860  33.72  32.385   154759.0
23320770   ZYNE  08/30/21      4.11   4.158   4.20   4.050  2874199.0
23320771   ZYXI  08/30/21     13.38  13.400  13.59  13.250   166740.0


In [None]:
output_file = FM_INTERFACE_DIR + 'SHARADAR_SEP_SFP.csv'
try:
    df_dataset_import.to_csv(output_file, sep=',', header=False, index=False)
    print('Data frame converted successfully to the output file.')
except:
    print('Data frame did not get converted to the output file!')

In [None]:
print ('Total time for the script:',(datetime.now() - startTimeScript))

Total time for the script: 0:05:31.048175
