# Stock Market Ontology

### Dependencies

In [9]:
import csv
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import math
import os
import time
import numpy
import tinvest
from tinvest import CandleResolution
from rdflib import URIRef
from rdflib import Graph, Literal
from rdflib.namespace import RDF


### Metrics

In [10]:
fundamental_metrics = ['Index', 'P/E', 'P/S', 'Dividend %', 'Payout', 'Beta']

all_metrics = [*['Name', 'Sector', 'Country'], *fundamental_metrics]

### Get stocks tickers from [Nasdaq](https://www.nasdaq.com/market-activity/stocks/screener) and save to CSV
Use saved tickets to get and parse data from resource

In [11]:
# Read tickers from Nasdaq CSV file
def get_stock_ticker_list(file_name, ticker_field_name):
    tickers = []
    with open(file_name, newline='') as file:
        reader = csv.DictReader(file, [ticker_field_name])
        for row in reader:
            ticker = row[ticker_field_name]
            tickers.append(ticker)
    return tickers[1:]


### Parsing data from [FINVIZ](https://finviz.com/quote.ashx)

Received values:

* Name
* Sector
* Country
* P/E
* P/S
* Dividend %
* Payout
* Beta

In [12]:
resourceUrl = 'https://finviz.com/quote.ashx'


def get_fundamental_metric(soup, metric=None):
    if metric is None:
        metric = fundamental_metrics
    # Search in table with fundamental metrics
    name_cell = soup.find(text=metric) # First search header cell
    value_cell = name_cell.find_next(class_='snapshot-td2') # Next search closest cell
    return value_cell.text


def get_name_sector_country(soup):
    table = soup.find(attrs={'data-testid': 'quote-data-content'})
    links = table.findAll(class_='tab-link')

    name = links[0].find('b').text
    sector = links[1].text
    country = links[3].text

    return [('Name', name), ('Sector', sector), ('Country', country)]


def get_fundamental_data(df):
    notFound = []
    for symbol in df.index:
        try:
            headers = {
                'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'}
            response = requests.get(resourceUrl + '?t=' + symbol, headers=headers)
            soup = bs(markup=response.content, features="html.parser")

            for metric in fundamental_metrics:
                metricValue = get_fundamental_metric(soup, metric)
                df.loc[symbol, metric] = metricValue

            name_sector_country = get_name_sector_country(soup)
            for field in name_sector_country:
                df.loc[symbol, field[0]] = field[1]
        except Exception as e:
            notFound.append(symbol)
        print('Parsed ticker:', symbol)

    return df, notFound

### Get data from [Tinkoff API](https://tinkoffcreditsystems.github.io/invest-openapi/)

Get monthly candles for the last two years

Calculate growth for each month

The expected growth calculate as mean growth for each month

The risk is calculated using the standard deviation formula


In [13]:
token = os.environ['TINKOFF_INVEST_TOKEN']
client = tinvest.SyncClient(token)


def calculate_risk_and_expect_growth_in_month(ticker):
    resp = make_request(client.get_market_search_by_ticker, ticker)
    instruments = resp.payload.instruments

    if len(instruments) == 0:
        return None

    figi = instruments[0].figi

    candlesHistoryInMonth = make_request(client.get_market_candles,
                                         figi,
                                         '2019-01-01T00:00:00.00+00:00',
                                         '2021-04-30T23:59:59.00+00:00',
                                         CandleResolution.month)

    prices = []
    for candle in candlesHistoryInMonth.payload.candles:
        prices.append((candle.h + candle.l) / 2)

    growths = []
    for i in range(0, len(prices) - 1):
        growths.append(math.log(prices[i + 1] / prices[i]))

    expect_growth = numpy.mean(growths)
    risk = numpy.std(growths)

    return expect_growth * 100, risk * 100  # To percent


def make_request(request, *args):
    try:
        return request(*args)
    except tinvest.TooManyRequestsError as e:
        print('Too many request, wait 5 minute')
        time.sleep(60 * 5 + 5)  # 5 min 5 sec
        return request(tuple(args))

### Save Received data from [FINVIZ](https://finviz.com/quote.ashx) and [Tinkoff API](https://tinkoffcreditsystems.github.io/invest-openapi/) in result csv

After that, we can read data from CSV and fill ontology

In [14]:
def get_stocks(file_name='data.csv', separator=','):
    stocks = []
    table = pd.read_csv(file_name, sep=separator)
    for row in table.iterrows():
        stock = {}
        for col in table.columns:
            stock[col] = row[1][col]
        stocks.append(stock)

    return stocks

def get_companies_sectors_countries(file_name='data.csv', separator=','):
    table = pd.read_csv(file_name, sep=separator, parse_dates=[
        'Name', 'Sector', 'Country'
    ])

    allCompanies = set()
    allSectors = set()
    allCountries = set()

    for company in table['Name']:
        allCompanies.add(company)

    for sector in table['Sector']:
        allSectors.add(sector)

    for country in table['Country']:
        allCountries.add(country)

    return list(allCompanies), list(allSectors), list(allCountries)

### Declare Classes, Object properties and Data properties



In [15]:
URI = 'http://www.semanticweb.org/matvey/ontologies/2021/4/stock-market-ontology#'


def to_URIRef(param):
    result = URIRef(URI + param)
    return result


# Classes
Asset = to_URIRef('Asset')
Bond = to_URIRef('Bond')
Company = to_URIRef('Company')
Country = to_URIRef('Country')
Currency = to_URIRef('Currency')
Dividend = to_URIRef('Dividend')
Fond = to_URIRef('Fond')
Growth = to_URIRef('Growth')
Index = to_URIRef('Index')
Sector = to_URIRef('Sector')
Stock = to_URIRef('Stock')

# Object properties
hasCompany = to_URIRef('hasCompany')
hasCountry = to_URIRef('hasCountry')
hasCurrency = to_URIRef('hasCurrency')
includeInIndex = to_URIRef('includeInIndex')
includeInSector = to_URIRef('includeInSector')

# Data properties
hasAverageDividendYield = to_URIRef('hasAverageDividendYield')
hasPE = to_URIRef('hasPE')
hasPS = to_URIRef('hasPS')
hasPayoutRatio = to_URIRef('hasPayoutRatio')
hasExpectGrowthInMonth = to_URIRef('hasExpectGrowInMonth')
hasRisk = to_URIRef('hasRisk')
hasBeta = to_URIRef('hasBeta')



### Fill the ontology with data


In [16]:
def default_data_parser(value):
    return value


def percent_data_parser(value):
    return value.replace('%', '')


def default_name_parser(name):
    return name.strip()


def to_fixed(number, digits=2):
    return f"{number:.{digits}f}"


dataPropertyMap = {
    'P/E': {
        'relation': hasPE,
        'data_parser': default_data_parser
    },
    'P/S': {
        'relation': hasPS,
        'data_parser': default_data_parser
    },
    'Dividend %': {
        'relation': hasAverageDividendYield,
        'data_parser': percent_data_parser
    },
    'Payout': {
        'relation': hasPayoutRatio,
        'data_parser': percent_data_parser
    },
    'Beta': {
        'relation': hasBeta,
        'data_parser': percent_data_parser
    },
    'Expect Growth in month': {
        'relation': hasExpectGrowthInMonth,
        'data_parser': to_fixed
    },
    'Risk': {
        'relation': hasRisk,
        'data_parser': to_fixed
    }
}


def add_indexes(graph: Graph):
    indexes = ['DJIA', 'S&P500']
    return add_class_individuals_to_graph(graph, Index, indexes)


def add_companies_sectors_countries(graph: Graph):
    companies, sectors, countries = get_companies_sectors_countries()

    companiesResult = add_class_individuals_to_graph(graph, Company, companies, lambda name: name.replace(',', '')
                                                     .replace('.', '')
                                                     .strip())
    sectorsResult = add_class_individuals_to_graph(graph, Sector, sectors)
    countriesResult = add_class_individuals_to_graph(graph, Country, countries)

    return companiesResult, sectorsResult, countriesResult


def add_stocks(graph: Graph, all_indexes_nodes, all_companies_nodes, all_sectors_nodes, all_countries_nodes):
    stocks = get_stocks()

    result = {}
    for stock in stocks:
        ticker = stock['Ticker']
        stock_node = add_class_individual_to_graph(graph, Stock, ticker)
        result[ticker] = stock_node

        # Set object properties
        company_node = all_companies_nodes[stock['Name']]
        sector_node = all_sectors_nodes[stock['Sector']]
        country_node = all_countries_nodes[stock['Country']]
        indexes_nodes = get_indexes_nodes_for_stock(stock['Index'], all_indexes_nodes)

        add_object_property(graph, stock_node, company_node, hasCompany)
        add_object_property(graph, stock_node, sector_node, includeInSector)
        add_object_property(graph, stock_node, country_node, hasCountry)
        for index_node in indexes_nodes:
            add_object_property(graph, stock_node, index_node, includeInIndex)

        # Set data properties
        for key in dataPropertyMap.keys():
            dataPropertyInfo = dataPropertyMap[key]
            value = stock[key]
            if value == '-':
                continue
            parsed_value = dataPropertyInfo['data_parser'](value)
            add_data_property(graph, stock_node, parsed_value, dataPropertyInfo['relation'])

    return result


def add_class_individuals_to_graph(graph: Graph, Class: URIRef, names: list[string], parser=default_name_parser):
    result = {}
    for name in names:
        result[name] = add_class_individual_to_graph(graph, Class, name, parser)

    return result


def add_class_individual_to_graph(graph: Graph, Class: URIRef, name: string, parser=default_name_parser):
    parsed_name = parser(name)
    individual = to_URIRef(parsed_name.replace(' ', '_'))
    triple = (individual, RDF.type, Class)
    graph.add(triple)

    return individual


def add_object_property(graph, domain, client, relation):
    triple = (domain, relation, client)
    graph.add(triple)


def add_data_property(graph, domain, value, relation):
    triple = (domain, relation, Literal(value, datatype='xsd:float'))
    graph.add(triple)


def get_indexes_nodes_for_stock(stock_indexes: string, all_indexes_nodes):
    if stock_indexes == '-':
        return []

    parsed_stock_indexes = stock_indexes.replace(' ', '')

    result = []
    for key in all_indexes_nodes.keys():
        if key in parsed_stock_indexes:
            result.append(all_indexes_nodes[key])

    return result

NameError: name 'string' is not defined

### SPARQL Queries



In [18]:
URI = 'http://www.semanticweb.org/matvey/ontologies/2021/4/stock-market-ontology#'


def most_growing_stocks_query(max_risk=8, max_pe=35, min_beta=0.59, max_beta=1.59):
    return """
        prefix : <http://www.semanticweb.org/matvey/ontologies/2021/4/stock-market-ontology#>
        prefix xsd: <http://www.w3.org/2001/XMLSchema#>
        prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

        SELECT ?name ?company ?growth ?pe ?risk ?beta WHERE {
            ?stock rdf:type :Stock .
            ?stock :hasExpectGrowInMonth ?growth .
            ?stock :hasRisk ?risk .
            ?stock :hasPE ?pe .
            ?stock :hasCompany ?companyUri .
            ?stock :hasBeta ?beta .
            BIND(REPLACE(STR(?companyUri), "%s", "", "i") AS ?company) .
            BIND(REPLACE(STR(?stock), "%s", "", "i") AS ?name) .
            FILTER(
                xsd:float(STR(?risk)) <= %s &&
                xsd:float(STR(?pe)) <= %s
            ) .
            FILTER(
                xsd:float(STR(?beta)) >= %s &&
                xsd:float(STR(?beta)) <= %s
            )
        }
        ORDER BY DESC(xsd:float(STR(?growth)))
        LIMIT 10
    """ % (URI, URI, max_risk, max_pe, min_beta, max_beta)


def most_growing_stocks_in_sector_query(sector, max_risk=8, max_pe=35, min_beta=0.59, max_beta=1.59):
    return """
        prefix : <http://www.semanticweb.org/matvey/ontologies/2021/4/stock-market-ontology#>
        prefix xsd: <http://www.w3.org/2001/XMLSchema#>
        prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

        SELECT ?name ?sector ?company ?growth ?pe ?risk ?beta WHERE {
            ?stock rdf:type :Stock .
            ?stock :hasExpectGrowInMonth ?growth .
            ?stock :hasRisk ?risk .
            ?stock :hasPE ?pe .
            ?stock :hasCompany ?companyUri .
            ?stock :hasBeta ?beta .
            ?stock :includeInSector ?sectorUri .
            BIND(REPLACE(STR(?sectorUri), "%s", "", "i") AS ?sector) .
            BIND(REPLACE(STR(?companyUri), "%s", "", "i") AS ?company) .
            BIND(REPLACE(STR(?stock), "%s", "", "i") AS ?name) .
            FILTER(STR(?sector) = "%s") .
            FILTER(
                xsd:float(STR(?risk)) <= %s &&
                xsd:float(STR(?pe)) <= %s
            ) .
            FILTER(
                xsd:float(STR(?beta)) >= %s &&
                xsd:float(STR(?beta)) <= %s
            )
        }
        ORDER BY DESC(xsd:float(STR(?growth)))
        LIMIT 10
    """ % (URI, URI, URI, sector, max_risk, max_pe, min_beta, max_beta)


def most_growing_stocks_in_index_query(index, max_risk=8, max_pe=35, min_beta=0.59, max_beta=1.59):
    return """
        prefix : <http://www.semanticweb.org/matvey/ontologies/2021/4/stock-market-ontology#>
        prefix xsd: <http://www.w3.org/2001/XMLSchema#>
        prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

        SELECT ?name ?index ?company ?growth ?pe ?risk ?beta WHERE {
            ?stock rdf:type :Stock .
            ?stock :hasExpectGrowInMonth ?growth .
            ?stock :hasRisk ?risk .
            ?stock :hasPE ?pe .
            ?stock :hasCompany ?companyUri .
            ?stock :hasBeta ?beta .
            ?stock :includeInIndex ?indexUri .
            BIND(REPLACE(STR(?indexUri), "%s", "", "i") AS ?index) .
            BIND(REPLACE(STR(?companyUri), "%s", "", "i") AS ?company) .
            BIND(REPLACE(STR(?stock), "%s", "", "i") AS ?name) .
            FILTER(STR(?index) = "%s") .
            FILTER(
                xsd:float(STR(?risk)) <= %s &&
                xsd:float(STR(?pe)) <= %s
            ) .
            FILTER(
                xsd:float(STR(?beta)) >= %s &&
                xsd:float(STR(?beta)) <= %s
            )
        }
        ORDER BY DESC(xsd:float(STR(?growth)))
        LIMIT 10
    """ % (URI, URI, URI, index, max_risk, max_pe, min_beta, max_beta)


def most_dividend_paying_stocks_query(min_dividend_percent=2, min_payout=30, max_payout=75,
                                      max_pe=35, min_beta=0.59, max_beta=1.59):
    return """
        prefix : <http://www.semanticweb.org/matvey/ontologies/2021/4/stock-market-ontology#>
        prefix xsd: <http://www.w3.org/2001/XMLSchema#>
        prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

        SELECT ?name ?company ?dividend ?payout ?pe ?beta WHERE {
            ?stock rdf:type :Stock .
            ?stock :hasAverageDividendYield ?dividend .
            ?stock :hasPayoutRatio ?payout .
            ?stock :hasPE ?pe .
            ?stock :hasBeta ?beta .
            ?stock :hasCompany ?companyUri .
            BIND(REPLACE(STR(?companyUri), "%s", "", "i") AS ?company) .
            BIND(REPLACE(STR(?stock), "%s", "", "i") AS ?name) .
            FILTER(xsd:float(STR(?dividend)) >= %s) .
            FILTER(
                xsd:float(STR(?payout)) >= %s &&
                xsd:float(STR(?payout)) <= %s
            ) .
            FILTER(xsd:float(STR(?pe)) <= %s) .
            FILTER(
                xsd:float(STR(?beta)) >= %s &&
                xsd:float(STR(?beta)) <= %s
            )
        }
        ORDER BY DESC(xsd:float(STR(?dividend)))
        LIMIT 10
    """ % (URI, URI, min_dividend_percent, min_payout, max_payout, max_pe, min_beta, max_beta)


import pandas as pd
from rdflib import Graph


graph = Graph()
graph.parse('Stock-market-ontology.rdf', format='n3')


def run_query(query):
    result = graph.query(query)
    return pd.DataFrame(result, columns=result.vars)


most_growing_stocks = run_query(most_growing_stocks_query(max_risk=8, max_pe=35))

sector1 = 'Technology'
most_growing_stocks_in_sector1 = run_query(most_growing_stocks_in_sector_query(sector1))

sector2 = 'Communication_Services'
most_growing_stocks_in_sector2 = run_query(most_growing_stocks_in_sector_query(sector2))

index = 'S&P500'
most_growing_stocks_in_index = run_query(most_growing_stocks_in_index_query(index))

most_dividend_paying_stocks = run_query(most_dividend_paying_stocks_query())

print('The most growing stocks:')
print(most_growing_stocks)
print()

print("""The most growing stocks in %s sector: """ % sector1)
print(most_growing_stocks_in_sector1)
print()

print("""The most growing stocks in %s sector: """ % sector2)
print(most_growing_stocks_in_sector2)
print()

print("""The most growing stocks in %s index: """ % index)
print(most_growing_stocks_in_index)
print()

print('The dividend paying stocks:')
print(most_dividend_paying_stocks)
print()

The most growing stocks:
   name                         company growth     pe  risk  beta
0  LPLA      LPL_Financial_Holdings_Inc   7.45  27.15  6.65  1.29
1  VRTS  Virtus_Investment_Partners_Inc   7.05  17.78  3.14  1.58
2  IBKR   Interactive_Brokers_Group_Inc   6.31  23.28  5.11  0.67
3   AVT                       Avnet_Inc   5.76  27.15  5.00  1.53
4  MKSI             MKS_Instruments_Inc   5.26  24.58  7.56  1.56
5    JD                       JDcom_Inc   4.75  14.14  7.49  0.79
6  KLAC                 KLA_Corporation   4.59  25.66  7.71  1.24
7  AAPL                       Apple_Inc   4.50   28.6  7.29  1.22
8  FELE        Franklin_Electric_Co_Inc   4.47  34.75  3.15  0.98
9   CDW                 CDW_Corporation   4.07  28.66  5.35  1.12

The most growing stocks in Technology sector: 
   name      sector                         company growth     pe  risk  beta
0   AVT  Technology                       Avnet_Inc   5.76  27.15  5.00  1.53
1  MKSI  Technology             MKS_Instrumen