-
Notifications
You must be signed in to change notification settings - Fork 22
/
db_part_two_sp500_stock_loader.py
96 lines (79 loc) · 2.98 KB
/
db_part_two_sp500_stock_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 10 11:16:08 2018
@author: antonio constandinou
"""
from __future__ import print_function
import datetime
import bs4
import psycopg2
import requests
import os
def parse_wiki_snp500():
"""
Download and parse Wikipedia for the current list of S&P500 companies.
return:
list of tuples to add to PostgreSQL.
"""
now = datetime.datetime.utcnow()
# return html of our desired S&P 500 webapge on wikipedia
response = requests.get("http://en.wikipedia.org/wiki/List_of_S%26P_500_companies")
# soup object
soup = bs4.BeautifulSoup(response.text)
# CSS Selector syntax: find first table, select all rows and skip headers row
symbols_list = soup.select('table')[0].select('tr')[1:]
symbols = []
for i, symbol in enumerate(symbols_list):
# standard cell containing our data: 'td'
tds = symbol.select('td')
symbols.append(
(tds[0].select('a')[0].text,'equity',
tds[1].select('a')[0].text,
tds[3].text, 'USD', now, now)
)
return symbols
def insert_snp500_symbols_postgres(symbols, db_host, db_user, db_password, db_name):
"""
Load S&P500 symbols into our PostgreSQL database.
args:
symbols: list of tuples which holds our stock info data.
db_host: name of host to connect to db, type string.
db_user: name of user_name to connect to db, type string.
db_name: name of our database, type string.
returns:
None
"""
# Connect to our PostgreSQL database
conn = psycopg2.connect(host=db_host, database=db_name, user=db_user, password=db_password)
column_str = """
ticker, instrument, name, sector, currency, created_date, last_updated_date
"""
insert_str = ("%s, " * 7)[:-2]
final_str = "INSERT INTO symbol (%s) VALUES (%s)" % (column_str, insert_str)
with conn:
cur = conn.cursor()
cur.executemany(final_str, symbols)
def load_db_info(f_name_path):
"""
load text file holding our database credential info and the database name
args:
f_name_path: name of file preceded with "\\", type string
returns:
array of 4 values that should match text file info
"""
cur_path = os.getcwd()
# lets load our database credentials and info
f = open(cur_path + f_name_path, 'r')
lines = f.readlines()[1:]
lines = lines[0].split(',')
return lines
def main():
db_info_file = "database_info.txt"
db_info_file_p = "\\" + db_info_file
# necessary database info to connect and load our symbols further below
db_host, db_user, db_password, db_name = load_db_info(db_info_file_p)
symbols = parse_wiki_snp500()
insert_snp500_symbols_postgres(symbols, db_host, db_user, db_password, db_name)
print("%s symbols were successfully added." % len(symbols))
if __name__ == "__main__":
main()