In [2]:
import configparser
import pandas as pd
from create_connection import create_database_connection
import sql_queries as sql
import os
import numpy as np
from iex_api import IEXmanager

In [2]:
conn = create_database_connection()
cur = conn.cursor()

TypeError: create_database_connection() missing 1 required positional argument: 'config_file'

In [8]:
CONFIG_FILE = 'dwh.cfg'

def get_stock_symbols(path):
    METAFILE = 'symbols_valid_meta.csv'
    all_symbols = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file != METAFILE:
                symbol = file.split('.')[0]
                all_symbols.append(symbol)
    return all_symbols

def create_sql_query(table, data):
    columns = []
    values = []
    for column, value in data.items():
        columns.append(column.lower())
        if value is None: # Convert to NULL
            value = 'NULL'
        elif not isinstance(value, int ) and not isinstance(value, float): # Add Quotes to string values
            value = f"'{str(value)}'"
        else:
            value = str(value)
        values.append(value)

    sql_query = f"""INSERT INTO {table} ({','.join(columns)})
                    VALUES ({','.join(values)})"""              
    return sql_query

def load_iex_row(iex_manager, symbol, api_type, table_name, cur=cur, conn=conn):
    try:
        print(f"INFO: Fetching {symbol} {api_type} data")
        data = iex_manager.get_symbol_data(symbol, api_type)
    except Exception as e:
        print(e)
    try:
        print("Loading to database")
        sql_query = create_sql_query(table_name, data)
        cur.execute(sql_query)
        conn.commit()
        print("Done!")
    except Exception as e:
        print(e)
        conn.rollback()

def load_api_data(iex_manager, all_symbols, tables):
    errors = []
    for symbol in all_symbols:
        for api_type, table_name in tables.items():
            load_iex_row(iex_manager, symbol, api_type, table_name)

In [9]:
path = os.path.join('data', 'stock-market-dataset')
all_symbols = get_stock_symbols(path)

In [None]:
iex_manager = IEXmanager(CONFIG_FILE)
data = iex_manager.get_symbol_data(symbol, 'company')

In [11]:
tables = {'company': 'staging_companies', 'stats': 'staging_stats'}
iex_manager = IEXmanager(CONFIG_FILE)
load_api_data(iex_manager, all_symbols, tables)

INFO: Fetching AAAU company data
Loading to database
Done!
INFO: Fetching AAAU stats data
Loading to database
Done!
INFO: Fetching AADR company data
Loading to database
Done!
INFO: Fetching AADR stats data
Loading to database
Done!
INFO: Fetching AAXJ company data
Loading to database
Done!
INFO: Fetching AAXJ stats data
Loading to database
Done!
INFO: Fetching ABEQ company data
Loading to database
Done!
INFO: Fetching ABEQ stats data
Loading to database
Done!
INFO: Fetching ACES company data
Loading to database
Done!
INFO: Fetching ACES stats data
Loading to database
Done!
INFO: Fetching ACIO company data
Loading to database
Done!
INFO: Fetching ACIO stats data
Loading to database
Done!
INFO: Fetching ACSG company data
Loading to database
Done!
INFO: Fetching ACSG stats data
Loading to database
Done!
INFO: Fetching ACSI company data
Loading to database
Done!
INFO: Fetching ACSI stats data
Loading to database
Done!
INFO: Fetching ACT company data
Loading to database
Done!
INFO: Fetching

KeyboardInterrupt: 

In [4]:
from threading import Thread
import time
def threaded_process(nthreads, all_symbols):
    """process the id range in a specified number of threads"""
    store = {}
    threads = []
    # create the threads
    for i in range(nthreads):
        symbols = all_symbols[i::nthreads]
        iex_manager = IEXmanager(CONFIG_FILE)
        tables = {'company': 'staging_companies', 'stats': 'staging_stats'}
        t = Thread(target=load_api_data, args=(iex_manager, symbols, tables))
        threads.append(t)
    
    # start the threads
    [ t.start() for t in threads ]
    # wait for the threads to finish
    [ t.join() for t in threads ]

In [None]:
tic = time.time()
threaded_process(2, all_symbols)
toc = time.time()

INFO: Fetching AAAU company dataINFO: Fetching AADR company data

Loading to database
Done!
INFO: Fetching AAAU stats data
Loading to database
Done!
INFO: Fetching AADR stats data
Loading to databaseLoading to database

Done!Done!
INFO: Fetching ABEQ company data

INFO: Fetching AAXJ company data
Loading to database
Done!
INFO: Fetching ABEQ stats data
Loading to database
Done!
INFO: Fetching AAXJ stats data
Loading to databaseLoading to database

Done!Done!

INFO: Fetching ACIO company data
INFO: Fetching ACES company data
Request failed with the following status code 429Loading to database

Loading to database
local variable 'data' referenced before assignment
Done!INFO: Fetching ACES stats data

INFO: Fetching ACIO stats data
Loading to databaseLoading to database

Done!Done!

INFO: Fetching ACSI company data
INFO: Fetching ACSG company data
Loading to database
Done!
INFO: Fetching ACSI stats data
Loading to database
Done!
INFO: Fetching ACSG stats data
Loading to databaseRequest fa

In [17]:
d = []
errors = []
for symbol in all_symbols:
    try:
        print(f"INFO: Fetching {symbol}")
        data = iex_manager.get_symbol_data(symbol, 'stats')
        d.append(data)
    except Exception as e:
        print(f"Error on {symbol}. Skipping")
        errors.append((symbol, e))
df = pd.DataFrame(d)

INFO: Fetching AAAU
INFO: Fetching AADR
INFO: Fetching AAXJ
INFO: Fetching ABEQ
INFO: Fetching ACES
INFO: Fetching ACIO
INFO: Fetching ACSG
INFO: Fetching ACSI
INFO: Fetching ACT
INFO: Fetching ACWF
INFO: Fetching ACWI
INFO: Fetching ACWV
INFO: Fetching ACWX
INFO: Fetching ADME
INFO: Fetching ADRE
INFO: Fetching AESR
INFO: Fetching AFIF
INFO: Fetching AFK
INFO: Fetching AFLG
INFO: Fetching AFMC
INFO: Fetching AFSM
INFO: Fetching AFTY
INFO: Fetching AGG
INFO: Fetching AGGP
INFO: Fetching AGGY
INFO: Fetching AGND
INFO: Fetching AGQ
INFO: Fetching AGT
INFO: Fetching AGZ
INFO: Fetching AGZD
INFO: Fetching AIA
INFO: Fetching AIEQ
INFO: Fetching AIIQ
INFO: Fetching AIQ
INFO: Fetching AIRR
INFO: Fetching ALFA
INFO: Fetching ALTS
INFO: Fetching ALTY
INFO: Fetching AMCA
INFO: Fetching AMLP
INFO: Fetching AMOM
INFO: Fetching AMZA
INFO: Fetching ANGL
INFO: Fetching AOA
INFO: Fetching AOK
INFO: Fetching AOM
INFO: Fetching AOR
INFO: Fetching ARCM
INFO: Fetching ARGT
INFO: Fetching ARKF
INFO: Fetchi

In [19]:
df.to_csv(os.path.join('data', 'company_stats.csv'), sep=';', index=False)

In [3]:
errors = []
path = os.path.join(os.getcwd(), 'data', 'stock-market-dataset', 'stocks')
for file in os.listdir(path):
    if file.endswith('.csv'):
        try:
            stock = file.split('.')[0]
            file_path = os.path.join(path, file)
            df = pd.read_csv(file_path)
            df['symbol'] = stock
            df['stock_type'] = 'stock'
            df.to_csv(file_path, index=False)
            f_handle = open(os.path.join(path, file), 'r')
            header = f_handle.readline()
            cur.copy_from(f_handle, 'staging_daily_quotes', sep=',', null="")
            conn.commit()
        except Exception as e:
            print(e)
            errors.append((file, e))
            conn.rollback()

In [15]:
stats = os.getcwd(), 'data', 'company_stats.csv'
df = pd.read_csv(companies_path)
df.to_csv(companies_path, sep=';')

In [61]:
stats = os.path.join(os.getcwd(), 'data', 'companies_with_dummy_demographics.csv')
df = pd.read_csv(stats, delimiter=';')
for column in df.columns:
    try:
        df[column] = df[column].str.replace(';', ' ').str.replace('\n', ' ').str.strip()
    except:
        continue
df = df.replace({r'[^\x00-\x7F]+':''}, regex=True)
df.to_csv(stats, index=False, sep=';')

In [60]:
df.loc[4493]

symbol                                                          GPL
companyName                               Great Panther Mining Ltd.
exchange                                              YmrnaNSA iEec
industry                                            eMoPs istclauer
website                                 caahthw.ww:oeep.gnpr//trtmt
description       tndafTsdaheaaonIt- arnn goutAxbnrpiah-salta ur...
CEO                                             rndonebtReHeD . sor
securityName                           nrtntiPe  adiGLMietih gneram
issueType                                                        cs
sector                                                       #NAME?
primarySicCode                                                 1061
employees                                                       849
address                                        enSre0irG20 tte lval
address2                                                        NaN
state                                           

In [48]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
df[df['symbol'] == 'GAIN'].to_csv('inspect.csv')

In [62]:
companies_path = os.path.join(os.getcwd(), 'data', 'companies_with_dummy_demographics.csv')
f_handle = open(companies_path, 'r')
header = f_handle.readline().split(';')
try:
    cur.copy_from(f_handle, 'staging_companies', sep=';', null="", columns=header)
    conn.commit()
except Exception as e:
    print(e)
    conn.rollback()

In [65]:
demographics = os.path.join(os.getcwd(), 'data', 'us-cities-demographics.csv')
f_handle = open(demographics, 'r')
header = f_handle.readline().split(';')
try:
    cur.copy_from(f_handle, 'staging_demographics', sep=';', null="")
    conn.commit()
except Exception as e:
    print(e)
    conn.rollback()

In [73]:
METAFILE = 'symbols_valid_meta'
path = os.path.join(os.getcwd(), 'data', 'stock-market-dataset')
for root, dirs, files in os.walk(path):
    for file in files:
        if file.endswith('csv') and METAFILE not in file:
            print(root, file)

C:\Users\jbsab\Desktop\Courses\Data Engineering\Udacity\udacity_projects\nasdaq_datawarehouse\data\stock-market-dataset\etfs AAAU.csv
C:\Users\jbsab\Desktop\Courses\Data Engineering\Udacity\udacity_projects\nasdaq_datawarehouse\data\stock-market-dataset\etfs AADR.csv
C:\Users\jbsab\Desktop\Courses\Data Engineering\Udacity\udacity_projects\nasdaq_datawarehouse\data\stock-market-dataset\etfs AAXJ.csv
C:\Users\jbsab\Desktop\Courses\Data Engineering\Udacity\udacity_projects\nasdaq_datawarehouse\data\stock-market-dataset\etfs ABEQ.csv
C:\Users\jbsab\Desktop\Courses\Data Engineering\Udacity\udacity_projects\nasdaq_datawarehouse\data\stock-market-dataset\etfs ACES.csv
C:\Users\jbsab\Desktop\Courses\Data Engineering\Udacity\udacity_projects\nasdaq_datawarehouse\data\stock-market-dataset\etfs ACIO.csv
C:\Users\jbsab\Desktop\Courses\Data Engineering\Udacity\udacity_projects\nasdaq_datawarehouse\data\stock-market-dataset\etfs ACSG.csv
C:\Users\jbsab\Desktop\Courses\Data Engineering\Udacity\udacit

In [1]:
import configparser
import pandas as pd
from create_connection import create_database_connection
import sql_queries as sql
import os
import numpy as np
from iex_api import IEXmanager

conn = create_database_connection('dwh.cfg')
cur = conn.cursor()

In [2]:
from sql_queries import insert_queries
from create_tables import create_tables

create_tables(cur, conn)

In [3]:
for query in insert_queries:
    try:
        cur.execute(query)
        conn.commit()
    except Exception as E:
        print(E)
        conn.rollback()