In [1]:
import sqlite3
import pandas as pd

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by the db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)
 
    return conn

def getNews(conn):
    """
    :param conn: the Connection object
    :tablename
    :return: df
    """
    query = '''
    select 
        a.id,
	    a.symbol, 
	    a.descript, 
        a.exchange
    from 
	    stock_master as a
    '''
    return pd.read_sql_query(query, conn)

conn = create_connection("./data/test.db")
df = getNews(conn)
df.head()

Unnamed: 0,ID,symbol,descript,exchange
0,21480311,A,ARMOR MINERALS INC.,TOR-V
1,96768,AC,AIR CANADA,TOR
2,26050555,AD,ALARIS ROYALTY CORP,TOR
3,31349,AF,ALARMFORCE,TOR
4,69675825,AI,ATRIUM MORTGAGE INVESTMENT CORPORATION,TOR


In [2]:
set(df.exchange)

{'TOR', 'TOR-V'}

In [3]:
def get_ticket(symbol, exchange):
    ticket = ''
    if not ('.' in symbol):
        if exchange == "TOR-V":
            ticket = symbol + ".V"
        else:
            ticket = symbol + ".TO"
    return ticket

ticket = [get_ticket(s,x) for (s,x) in zip(df.symbol, df.exchange)]
df['ticket'] = ticket
df.head(10)

Unnamed: 0,ID,symbol,descript,exchange,ticket
0,21480311,A,ARMOR MINERALS INC.,TOR-V,A.V
1,96768,AC,AIR CANADA,TOR,AC.TO
2,26050555,AD,ALARIS ROYALTY CORP,TOR,AD.TO
3,31349,AF,ALARMFORCE,TOR,AF.TO
4,69675825,AI,ATRIUM MORTGAGE INVESTMENT CORPORATION,TOR,AI.TO
5,32660811,AR,ARGONAUT GOLD LTD,TOR,AR.TO
6,24512696,AZ,ARIZONA MINING INC,TOR,AZ.TO
7,21503032,AAB,ABERDEEN INTL INC,TOR,AAB.TO
8,29945393,AAV,ADVANTAGE OIL & GAS LTD,TOR,AAV.TO
9,13481030,ABT,ABSOLUTE SOFTWARE,TOR,ABT.TO


In [None]:
# optional: save all the value into attributes table instead of just storing industry

from pandas import json_normalize
import pandas as pd
import requests

def flatten_json(y): 
    out = {} 
  
    def flatten(x, name =''): 
        if type(x) is dict:               
            for a in x: 
                flatten(x[a], name + a + '_') 
        elif type(x) is list: 
            i = 0
            for a in x:                 
                flatten(a, name + str(i) + '_') 
                i += 1
        else: 
            out[name[:-1]] = x 
  
    flatten(y) 
    return out 

dfout = pd.DataFrame()
for ticket in df.ticket:
    if len(ticket) > 0:        
        try:
            r = requests.get("https://query1.finance.yahoo.com/v10/finance/quoteSummary/" + ticket + "?lang=en-US&region=US&modules=assetProfile&corsDomain=finance.yahoo.com")    
            rj = r.json()
            if not (rj['quoteSummary']['result'] is None):
                dfout = pd.concat([dfout, json_normalize(flatten_json(r.json()))])
        except:
            pass 
    print(ticket + ':' + str(len(dfout)))

dfout.to_sql('stock_attributes', con = conn)

In [5]:
dfout

Unnamed: 0,quoteSummary_result_0_assetProfile_address1,quoteSummary_result_0_assetProfile_address2,quoteSummary_result_0_assetProfile_city,quoteSummary_result_0_assetProfile_state,quoteSummary_result_0_assetProfile_zip,quoteSummary_result_0_assetProfile_country,quoteSummary_result_0_assetProfile_phone,quoteSummary_result_0_assetProfile_fax,quoteSummary_result_0_assetProfile_website,quoteSummary_result_0_assetProfile_industry,...,quoteSummary_result_0_assetProfile_companyOfficers_5_totalPay_longFmt,quoteSummary_result_0_assetProfile_companyOfficers_6_totalPay_raw,quoteSummary_result_0_assetProfile_companyOfficers_6_totalPay_fmt,quoteSummary_result_0_assetProfile_companyOfficers_6_totalPay_longFmt,quoteSummary_result_0_assetProfile_companyOfficers_7_totalPay_raw,quoteSummary_result_0_assetProfile_companyOfficers_7_totalPay_fmt,quoteSummary_result_0_assetProfile_companyOfficers_7_totalPay_longFmt,quoteSummary_result_0_assetProfile_companyOfficers_8_totalPay_raw,quoteSummary_result_0_assetProfile_companyOfficers_8_totalPay_fmt,quoteSummary_result_0_assetProfile_companyOfficers_8_totalPay_longFmt
0,999 Canada Place,Suite 555,Vancouver,BC,V6C 3E1,Canada,604-687-1717,604-687-1715,http://www.armorminerals.com,Other Industrial Metals & Mining,...,,,,,,,,,,
0,Air Canada Centre,7373 CÃ´te Vertu Boulevard West,Saint-Laurent,QC,H4Y 1H4,Canada,514-422-6020,514-422-2641,http://www.aircanada.com,Airlines,...,,,,,,,,,,
0,20 Adelaide Street East,Suite 900,Toronto,ON,M5C 2T6,Canada,416 867 1053,416 867 1303,http://www.atriummic.com,Mortgage Finance,...,,,,,,,,,,
0,9600 Prototype Court,,Reno,NV,89521,United States,775-284-4422,775-284-4426,http://www.argonautgold.com,Gold,...,,,,,,,,,,
0,65 Queen Street West,Suite 815 PO Box 75,Toronto,ON,M5H 2M5,Canada,416 861 5882,416 861 8165,http://www.aberdeeninternational.ca,Asset Management,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,5 Richard Way SW,Suite 102,Calgary,AB,T3E 7M8,Canada,403 266 4808,403 233 0845,http://www.sylogist.com,Software—Application,...,,,,,,,,,,
0,125 Don Hillock Drive,Unit 18,Aurora,ON,L4G 0H8,Canada,905-727-1779,905-727-8779,http://www.triorigin.com,Gold,...,,,,,,,,,,
0,7500 Macleod Trail SE,Suite 200,Calgary,AB,T2H 0L9,Canada,403 777 0036,403 777 0039,http://quorumdms.com,Software—Application,...,,,,,,,,,,
0,744 West Hastings Street,Suite 310,Vancouver,BC,V6C 1A5,Canada,604-682-2933,,http://www.segoresources.com,Other Industrial Metals & Mining,...,,,,,,,,,,


In [6]:
import requests
json_txts = []
sectors = []
industrys = []
for ticket in df.ticket:
    json_txt = ''
    sector = ''
    industry = ''
    if len(ticket) > 0:
        try:
            r = requests.get("https://query1.finance.yahoo.com/v10/finance/quoteSummary/" + ticket + "?lang=en-US&region=US&modules=assetProfile&corsDomain=finance.yahoo.com")
            json_txt = r.content
            try:
                sector = r.json()['quoteSummary']['result'][0]['assetProfile']['sector']
            except:
                pass
            try:
                industry = r.json()['quoteSummary']['result'][0]['assetProfile']['industry']
            except:
                pass
        except:
            pass    
        
    
    print(ticket + ':' + industry + ":" + sector)
    json_txts.append(json_txt)
    sectors.append(sector)
    industrys.append(industry)


https://query1.finance.yahoo.com/v10/finance/quoteSummary/A.V?lang=en-US&region=US&modules=assetProfile&corsDomain=finance.yahoo.com


In [16]:
df['industry']=industrys
df['sector'] = sectors
df['json'] = json_txt

In [18]:
print(len(set(industrys)))
print(len(set(sectors)))
df.head()

125
13


Unnamed: 0,ID,symbol,descript,exchange,ticket,industry,sector,json
0,21480311,A,ARMOR MINERALS INC.,TOR-V,A.V,Other Industrial Metals & Mining,Basic Materials,"b'{""quoteSummary"":{""result"":[{""assetProfile"":{..."
1,96768,AC,AIR CANADA,TOR,AC.TO,Airlines,Industrials,"b'{""quoteSummary"":{""result"":[{""assetProfile"":{..."
2,26050555,AD,ALARIS ROYALTY CORP,TOR,AD.TO,Conglomerates,Industrials,"b'{""quoteSummary"":{""result"":[{""assetProfile"":{..."
3,31349,AF,ALARMFORCE,TOR,AF.TO,,,"b'{""quoteSummary"":{""result"":[{""assetProfile"":{..."
4,69675825,AI,ATRIUM MORTGAGE INVESTMENT CORPORATION,TOR,AI.TO,Mortgage Finance,Financial Services,"b'{""quoteSummary"":{""result"":[{""assetProfile"":{..."


In [19]:
df.to_sql('master_ext', con = conn)

In [None]:
df