# Queries on the knowledge graph

In [1]:
import sys
#!{sys.executable} -m pip install pandas oxrdflib Pygments

import pandas as pd
from IPython.display import display, HTML
from pygments import highlight
from pygments.lexers import SparqlLexer
from pygments.formatters import HtmlFormatter
from rdflib import Graph


def run_query(graph, query_path):
    try:
        with open(query_path, 'r') as file:
            query = file.read()
    except Exception as _e:
        print(f"No file for {query_path}")
        return
    results = graph.query(query)
    # Display the SPARQL query
    formatted_query = highlight(query, SparqlLexer(),
                                HtmlFormatter(style='solarized-dark', full=True, nobackground=True))
    display(HTML(formatted_query))
    # Convert results to a Pandas DataFrame
    res_list = []
    for row in results:
        res_list.append([str(item) for item in row])
    df = pd.DataFrame(res_list, columns=[str(var) for var in results.vars]) if len(res_list) > 0 else pd.DataFrame()
    # Display the DataFrame as a table in Jupyter Notebook
    display(HTML(df.to_html()))
    #return df


In [2]:
g = Graph(store='Oxigraph')
g.parse('../data/finance-kg.ttl', format='turtle')

<Graph identifier=N7b173d3dbce34d22a6a478101a0b049f (<class 'rdflib.graph.Graph'>)>

In [3]:
# count amount of triples
print(f'amount of triples: {len(g)}')

amount of triples: 239085


In [4]:
g

<Graph identifier=N7b173d3dbce34d22a6a478101a0b049f (<class 'rdflib.graph.Graph'>)>

In [5]:
run_query(g, '../queries/test.rq')

Unnamed: 0,sector,sectorName
0,https://finance.yahoo.com/sectors/energy/oil-gas-equipment-services,Oil & Gas Equipment & Services
1,https://finance.yahoo.com/sectors/healthcare/healthcare-plans,Healthcare Plans
2,https://finance.yahoo.com/sectors/consumer-cyclical/auto-manufacturers,Auto Manufacturers
3,https://finance.yahoo.com/sectors/industrials/trucking,Trucking
4,https://finance.yahoo.com/sectors/consumer-cyclical/apparel-manufacturing,Apparel Manufacturing
5,https://finance.yahoo.com/sectors/consumer-defensive/food-distribution,Food Distribution
6,https://finance.yahoo.com/sectors/communication-services/telecom-services,Telecom Services
7,https://finance.yahoo.com/sectors/consumer-defensive/discount-stores,Discount Stores
8,https://finance.yahoo.com/sectors/financial-services/insurance-property-casualty,Insurance - Property & Casualty
9,https://finance.yahoo.com/sectors/utilities/utilities-independent-power-producers,Utilities - Independent Power Producers


In [6]:
run_query(g, '../queries/recommendation.rq')

Unnamed: 0,recommendationValue
0,21
1,20
2,20
3,14


In [7]:
run_query(g, '../queries/earnings_date.rq')

Unnamed: 0,earningsDate,date
0,https://finance.yahoo.com/quote/MSFT/earnings/2024-10-22,2024-10-22
1,https://finance.yahoo.com/quote/MSFT/earnings/2023-04-25,2023-04-25
2,https://finance.yahoo.com/quote/MSFT/earnings/2022-04-26,2022-04-26
3,https://finance.yahoo.com/quote/MSFT/earnings/2024-07-23,2024-07-23
4,https://finance.yahoo.com/quote/MSFT/earnings/2023-01-24,2023-01-24
5,https://finance.yahoo.com/quote/MSFT/earnings/2023-07-25,2023-07-25
6,https://finance.yahoo.com/quote/MSFT/earnings/2024-04-23,2024-04-23
7,https://finance.yahoo.com/quote/MSFT/earnings/2025-01-28,2025-01-28
8,https://finance.yahoo.com/quote/MSFT/earnings/2022-07-26,2022-07-26
9,https://finance.yahoo.com/quote/MSFT/earnings/2022-10-25,2022-10-25


# Validation queries

In [8]:
run_query(g, '../queries/validation/etf_company_count.rq')

Unnamed: 0,etfName,companyCount
0,iShares Core S&P 500 ETF,502
1,iShares MSCI World ETF,1441


In [9]:
run_query(g, '../queries/validation/company_amount.rq')

Unnamed: 0,totalDistinctCompanies
0,1466


In [10]:
run_query(g, '../queries/validation/address.rq')

Unnamed: 0,totalCompanies,companyWithAddress
0,1466,1466


In [11]:
run_query(g, '../queries/validation/recommendation.rq')

Unnamed: 0,totalCompanies,companiesWithRecommendation
0,1466,836


In [12]:
run_query(g, '../queries/validation/earnings.rq')

Unnamed: 0,totalCompanies,companiesWithEarningsDate
0,1466,1249


In [13]:
run_query(g, '../queries/validation/industry.rq')

Unnamed: 0,companyName,ticker
0,LIFCO AB,LFABF
1,MICHELIN(CIE GLE DES ETABL.),MGDDF
2,ISRAEL DISCOUNT BANK,ISDAF
3,AMPLIFON,AMFPF
4,BANQUE CANTONALE VAUDOISE,BQCNF
5,3I GROUP,TGOPF
6,SYENSQO SA,SHBBF
7,HIKARI TSUSHIN INC,HKTGF


# Analysis queries

In [14]:
run_query(g, '../queries/companies_per_etf.rq')

Unnamed: 0,etfname,companyCount,companyNameCount
0,iShares MSCI World ETF,1441,1441
1,iShares Core S&P 500 ETF,502,502


In [15]:
run_query(g, '../queries/gender.rq')

Unnamed: 0,gender,count
0,w,2830
1,m,8924


In [16]:
run_query(g, '../queries/sp_500_companies_not_in_msci.rq')

Unnamed: 0,companyName,companyTicker
0,"Tapestry, Inc.",TPR
1,V.F. Corporation,VFC
2,"Palo Alto Networks, Inc.",PANW
3,"Mondelez International, Inc.",MDLZ
4,Invesco Ltd,IVZ
5,Pinnacle West Capital Corporati,PNW
6,Walt Disney Company (The),DIS
7,"United Parcel Service, Inc.",UPS
8,Ralph Lauren Corporation,RL
9,Whirlpool Corporation,WHR


In [17]:
run_query(g, '../queries/msci_companies_not_in_sp_500.rq')

Unnamed: 0,companyName,companyTicker
0,LEGAL & GENERAL GROUP,LGGNF
1,SCOUT24 SE,SCOTF
2,KESKO OYJ,KKOYF
3,TOHO CO LTD,TKCOF
4,TFI International Inc.,TFII
5,ATLAS COPCO AB,ATLPF
6,JARDINE MATHESON HLDGS,JARLF
7,TELSTRA GROUP LIMITED,TTRAF
8,ENTAIN PLC,GMVHF
9,DAIMLER TRUCK HOLDING AG,DTGHF


In [18]:
run_query(g, '../queries/IVV_companies_country.rq')

Unnamed: 0,country,companyCount
0,United States,481
1,Ireland,10
2,Switzerland,4
3,United Kingdom,3
4,Bermuda,2
5,Netherlands,1
6,Canada,1


In [19]:
run_query(g, '../queries/URTH_companies_country.rq')

Unnamed: 0,country,companyCount
0,United States,572
1,Japan,214
2,Canada,86
3,United Kingdom,79
4,France,56
5,Germany,55
6,Australia,54
7,Switzerland,53
8,Sweden,41
9,Netherlands,29


In [20]:
run_query(g, '../queries/IVV_sectors.rq')

Unnamed: 0,sectorName,sectorPercentage
0,Technology,14.741035856573705
1,Industrials,14.541832669322709
2,Financial Services,13.545816733067728
3,Healthcare,12.94820717131474
4,Consumer Cyclical,11.354581673306772
5,Consumer Defensive,7.370517928286851
6,Real Estate,6.175298804780877
7,Utilities,5.97609561752988
8,Energy,4.581673306772908
9,Basic Materials,4.382470119521912


In [21]:
run_query(g, '../queries/URTH_sectors.rq')

Unnamed: 0,sectorName,sectorPercentage
0,Industrials,17.626648160999306
1,Financial Services,14.989590562109644
2,Technology,11.866759195003468
3,Consumer Cyclical,10.895211658570435
4,Healthcare,9.299097848716167
5,Consumer Defensive,7.356002775850104
6,Basic Materials,6.800832755031228
7,Real Estate,6.315058986814712
8,Communication Services,5.27411519777932
9,Utilities,4.857737682165163


In [22]:
run_query(g, '../queries/highest_payed.rq')

Unnamed: 0,executiveName,jobTitle,totalPay,companyName
0,"Stephen Allen Schwarzman B.A., M.B.A.","Chairman, CEO & Co-Founder",119784376.0,Blackstone Inc.
1,Helena Hedblom M.Sc.,"CEO, President & Director",22052000.0,EPIROC AB


In [24]:
# TODO: fix query
run_query(g, '../queries/upcoming_earnings.rq')

Unnamed: 0,company,companyName,ticker,oldestEarningsDate
0,https://finance.yahoo.com/quote/CRH,CRH PLC,CRH,1995-03-08
