## This section is defining functions for pulling country links and also compiling parameters

In [30]:
import json
import re
import os
import requests
from bs4 import BeautifulSoup
import http.client
import csv
import time
from datetime import datetime

In [2]:
basePath = 'E:\\webscr\\tradingView\\'

In [3]:
def compileAllParameters(payloadsPath):
    
    """
    Description: This function compiles a list of all parameters used across most tables on the website
    TradingView by accessing them from pre-downloaded and stored payload files in json form
    
    payloadsPath: The path to the payloads 
    paramList: A list of all the parameters that were used in the files found in the provided path
    
    """
    
    paramList = []
    for payload in os.listdir(payloadsPath):

        if '.json' in payload:
            
            f = open(payloadsPath+payload, "r")
            myFile = f.read()
            myPayLoad=json.loads(myFile)
            for el in myPayLoad['columns']:
                paramList.append(el)
                f.close()

    paramList = list(set(paramList))
    return paramList

In [4]:
#This doesn't work actually!
def doubleQuotes(filePath):
    
    comp = open(filePath)
    myComp = comp.read()
    comp.close()
    myComp.replace("\'", "\"")
    
    return myComp

In [5]:
def extractCatName(country,payloadName):

    """
    Description: Grabs the names of the tables from a fileName.
    country: a string of the name of the country in question
    
    """
    myString = payloadName
    matchAgainst = '{}(.+?)Payload*'.format(country)
    n = re.search(matchAgainst,myString)
    #print(n.group(1))
    return n.group(1)
    

In [6]:
def countryLinksFromStr(containingString):

    """
    Description: This function takes a script that contains markets links and extracts those links
    and edits them to fit them into proper link form
    
    containingString: A string of an HTML script element 
    
    Returns: a list of links for all markets on TradingView
    """
    markets = list(set(re.findall('"url":"/markets/[world-]{0,10}stocks[-/][a-zA-Z-]{0,50}/?',containingString)))
    
    modMarkets = []
    for market in markets:
        modMarkets.append(['https://www.tradingview.com'+market[7:]+'market-movers-large-cap/',market[23:-1]])
    
    print(len(markets))
    
    return(modMarkets)

In [7]:
def countryLinksFromPage(pageLink):
    
    """
    Description:This function extracts the script which contains links to markets and passes it to another which
    extracts the links themselves
    
    pageLink: a link of the page from which we'd like to extract market links 
    
    Returns: A list of links of stock markets pages
    """
    
    page = requests.get(pageLink).text
    parsed = BeautifulSoup(page,'lxml')
    
    #Links are all stored in a script so they can be added and removed when the user hovers over the markets tab
    input_tag = parsed.findAll('script', attrs= {"type" : "application/prs.init-data+json"})
    
    
    for tag in input_tag:
        #only the script that contains the links has the word egypt in it. Could change
        if 'egypt' in str(tag):
            linksString = str(tag)
            
            
    return countryLinksFromStr(linksString)
            
    

In [8]:
 countryLinksFromPage('https://www.tradingview.com/markets/stocks-usa/market-movers-large-cap/')

68


[['https://www.tradingview.com/markets/stocks-switzerland/market-movers-large-cap/',
  'switzerland'],
 ['https://www.tradingview.com/markets/stocks-argentina/market-movers-large-cap/',
  'argentina'],
 ['https://www.tradingview.com/markets/stocks-canada/market-movers-large-cap/',
  'canada'],
 ['https://www.tradingview.com/markets/stocks-latvia/market-movers-large-cap/',
  'latvia'],
 ['https://www.tradingview.com/markets/stocks-belgium/market-movers-large-cap/',
  'belgium'],
 ['https://www.tradingview.com/markets/stocks-ksa/market-movers-large-cap/',
  'ksa'],
 ['https://www.tradingview.com/markets/stocks-netherlands/market-movers-large-cap/',
  'netherlands'],
 ['https://www.tradingview.com/markets/stocks-vietnam/market-movers-large-cap/',
  'vietnam'],
 ['https://www.tradingview.com/markets/stocks-venezuela/market-movers-large-cap/',
  'venezuela'],
 ['https://www.tradingview.com/markets/stocks-romania/market-movers-large-cap/',
  'romania'],
 ['https://www.tradingview.com/markets

## This section will be concerned with defining Dictionaries that hold the value that appear on the page itself and their corresponding payload values

In [9]:
#This isn't done yet. Dictionarize it
dividends = {
    'title':'dividends',
    'Name':'description',
    'Currency':'fundamental_currency_code',
    'Dividends per share(FY)':'dps_common_stock_prim_issue_fy',
    'Dividends per share(FQ)':'dps_common_stock_prim_issue_fq',
    'Dividend yield %(TTM)':'dividends_yield_current',
    'Dividend yield FWD %':'dividends_yield',
    'Payout ratio %(TTM)':'dividend_payout_ratio_ttm',
    'Dividends per share growth %(Annual YoY)':'dps_common_stock_prim_issue_yoy_growth_fy',
    'Continuous dividend payout':'continuous_dividend_payout',
    'Continuous dividend growth':'continuous_dividend_growth'}

In [10]:
performance = {
    'title':'performance',
    'Name':'description',
    'Currency':'fundamental_currency_code',
    'Price':'close',
    'Change % 1D':'change',
    'Performance % 1W':'Perf.W',
    'Performance % 1M':'Perf.1M',
    'Performance % 3M':'Perf.3M',
    'Performance % 6M':'Perf.6M',
    'Performance % YTD':'Perf.YTD',
    'Performance % 1Y':'Perf.Y',
    'Performance % 5Y':'Perf.5Y',
    'Performance % All Time':'Perf.All',
    'Volatility 1W':'Volatility.W',
    'Volatility 1M':'Volatility.M',
   
    
    
}

In [11]:
valuation = {
    
    'title':'valuation',    
    'Name':'description',
    'Currency':'fundamental_currency_code',
    'Market cap':'market_cap_basic',
    'Market cap perf % 1Y':'Perf.1Y.MarketCap',
    'P/E':'price_earnings_ttm',
    'PEG (TTM)':'price_earnings_growth_ttm',
    'P/S':'price_sales_current',
    'P/B':'price_book_fq',
    'P/CF':'price_to_cash_f_operating_activities_ttm',
    'P/FCF':'price_free_cash_flow_ttm',
    'P/C':'price_to_cash_ratio',
    'EV':'enterprise_value_current',
    'EV / revenue (TTM)':'enterprise_value_to_revenue_ttm',
    'EV / EBIT (TTM)':'enterprise_value_to_ebit_ttm',
    'EV / EBITDA (TTM)':'enterprise_value_ebitda_ttm',
    
}

In [12]:
dividends = {
    
    'title':'dividends',
    'Name':'description',
    'Currency':'fundamental_currency_code',
    'Dividends per share (FY)':'dps_common_stock_prim_issue_fy',
    'Dividends per share (FQ)':'dps_common_stock_prim_issue_fq',
    'Dividend yield % (TTM)':'dividends_yield_current',
    'Dividend yield FWD %':'dividends_yield',
    'Payout ratio % (TTM)':'dividend_payout_ratio_ttm',
    'Dividends per share growth % (Annual YoY)':'dps_common_stock_prim_issue_yoy_growth_fy',
    'Continuous dividend payout':'continuous_dividend_payout',
    'Continuous dividend growth':'continuous_dividend_growth',
       
    
}

In [13]:
profitability = {
    'title':'profitability',
    'Name':'description',
                 'Currency':'fundamental_currency_code',
                 'Gross margin % (TTM)':'gross_margin_ttm',
                 'Operating margin % (TTM)':'operating_margin_ttm',
                 'Pretax margin % (TTM)':'pre_tax_margin_ttm',
                 'Net margin % (TTM)':'net_margin_ttm',
                 'Free cash flow margin % (TTM)':'free_cash_flow_margin_ttm',
                 'ROA % (TTM)':'return_on_assets_fq',
                 'ROE % (TTM)':'return_on_equity_fq',
                 'ROIC % (TTM)':'return_on_invested_capital_fq',
                 'R&D ratio (TTM)':'research_and_dev_ratio_ttm',
                 'SG&A ratio (TTM)':'sell_gen_admin_exp_other_ratio_ttm'
                }

In [14]:
incomeStatement = {
    'title':'incomestatement',
    'Name':'description',
                   'Currency':'fundamental_currency_code',
                   'Revenue (TTM)' : 'total_revenue_ttm',
                   'Revenue growth % (TTM YoY)':'total_revenue_yoy_growth_ttm',
                   'Gross profit (TTM)': "gross_profit_ttm",
                   'Operating income (TTM)': 'oper_income_ttm',
                   'Net income (TTM)' : 'net_income_ttm',
                   "EBITDA (TTM)":'ebitda_ttm',
                   'EPS Diluted (TTM)' : 'earnings_per_share_diluted_ttm',
                   'EPS Diluted% (TTM YoY)' : 'earnings_per_share_diluted_yoy_growth_ttm'}

In [15]:
balanceSheet = {
                'title':'balancesheet',
                'Name':'description',
                'Currency':'fundamental_currency_code',
                'Assets (FQ)':'total_assets_fq',
                'Current assets (FQ)':'total_current_assets_fq',
                'Cash on hand (FQ)': 'cash_n_short_term_invest_fq',
                'Liabilities (FQ)':'total_liabilities_fq',
                'Debt (FQ)': 'total_debt_fq',
                'Net debt (FQ)': 'net_debt_fq',
                'Equity':'total_equity_fq',
                'Current ratio (FQ)':'current_ratio_fq',
                'Quick ratio (FQ)':'quick_ratio_fq',
                'Debt / equity (FQ)':'debt_to_equity_fq',
                'Cash / debt (FQ)':'cash_n_short_term_invest_to_total_debt_fq'
               }

In [16]:
len(balanceSheet)

14

In [17]:
cashFlow = {
    
    'title':'cashflow',
    'Name':'description',
            'Currency':'fundamental_currency_code',
            'Operating CF (TTM)':'cash_f_operating_activities_ttm',
            'Investing CF (TTM)':'cash_f_investing_activities_ttm',
            'Financing CF (TTM)':'cash_f_financing_activities_ttm',
           'Free cash flow (TTM)':'free_cash_flow_ttm',
            'CAPEX (TTM)': 'capital_expenditures_ttm'
           }

In [18]:
oscillators = {
    
    'title':'oscillators',
    'Name':'description',
    'Oscillators Rating 1D':'Recommend.Other',
    'RSI (14) 1D':'RSI',
    'Momentum (10) 1D':'Mom',
    'Awesome Oscillator 1D':'AO',
    'Commodity Channel Index (20) 1D':'CCI20',
    'Stochastic (14,3,3) 1D %K':'Stoch.K',
    'Stochastic (14,3,3) 1D %D':'Stoch.D',
    'MACD (12,26) 1D Level' : 'MACD.macd',
    'MACD (12,26) 1D Signal':'MACD.signal'
              }

In [19]:
trendFollowing = {
    
    'title':'trendfollowing',
    'Name':'description',
    'Currency':'fundamental_currency_code',
    'MA Rating 1D':'Recommend.MA',
    'Price':'close',
    'Moving Average (20) 1D':'SMA20',
    'Moving Average (50) 1D':'SMA50',
    'Moving Average (200) 1D':'SMA200',
    'Bollinger Bands (20) 1D Upper':'BB.upper',
    'Bollinger Bands (20) 1D Lower':'BB.lower',
    
    
    
}

In [20]:
dictsList = [performance, dividends, oscillators, cashFlow, trendFollowing,incomeStatement , balanceSheet, valuation]

## This section will be concerned with dealing with the api

In [21]:
def getCountryTable(payloadPath, country, reqRows = 30000):
    
    with open(payloadPath, 'r') as f:
        
        conn = http.client.HTTPSConnection("scanner.tradingview.com")
        payloadString = f.read()
        payloadString = payloadString.replace("\"range\":[0,100]","\"range\":[0,{}]".format(reqRows))
    
    conn.request("POST", "/{}/scan".format(country), payloadString)
    #print("scanner.tradingview.com"+"/{}/scan".format(country))
    res = conn.getresponse()
    data = res.read()
    
    
    return json.loads(data)

In [22]:
uk_table = getCountryTable((basePath + 'jsonFiles\\ActualFiles\\canadaOscillatorsPayload.json'), 'uk', 20000)
uk_table

{'totalCount': 1104,
 'data': [{'s': 'LSE:SHEL',
   'd': ['SHELL PLC ORD EUR0.07',
    -0.09090909,
    44.79062963,
    -99.8934,
    9.62648529,
    -175.72406144,
    13.77600486,
    23.09551217,
    7.83005345,
    24.50048176]},
  {'s': 'LSE:AZN',
   'd': ['ASTRAZENECA ORD USD0.25',
    -0.18181818,
    42.80230879,
    -258,
    -427.19425,
    -52.38992991,
    30.47358435,
    23.75045517,
    -155.92110881,
    -150.52388025]},
  {'s': 'LSE:HSBA',
   'd': ['HSBC HOLDINGS PLC ORD USD0.50',
    -0.27272727,
    44.35149005,
    -6.01508,
    -25.65494888,
    -37.27859256,
    41.99677393,
    34.75634122,
    -8.08714667,
    -7.50260952]},
  {'s': 'LSE:ULVR',
   'd': ['UNILEVER PLC ORD GBP0.031111',
    -0.18181818,
    43.63452305,
    -121.51,
    -86.25156471,
    -43.48820535,
    33.47083333,
    30.53194444,
    -34.78118504,
    -33.94801658]},
  {'s': 'LSE:RIO',
   'd': ['RIO TINTO ORD GBP0.10',
    0.09090909,
    56.35489583,
    100,
    193.13238088,
    50.681304

In [23]:
def craftColumns(myDict, payloadJSON):
    
    """
    myDict: A hardcoded dicitionary that holds payload values and their corresponding manifistation on the page 
    myJSON: a json of the values sent in the payload
    Returns: A list of the column names pulled from the page itself as opposed to keys used to address the API
    """
    
    pageVals = []
    myColumns = payloadJSON['columns']
    
    for row in myColumns:
        
        if row in myDict.values():

            for key,val in myDict.items():
                

                if val == row:
                    pageVals.append(key)

        else:
            pageVals.append(row)

    return pageVals

In [24]:
resp = getCountryTable(basePath + 'jsonFiles\\ActualFiles\\canadaValuationPayload.json','canada')

In [25]:
def createCsv(fileName, country, response, valuesDict, payloadJSON):
    """
    Response: sever's response (JSON)
    fileName: Name of file you'll be creating to write the result to
    Columns: A list that will be written to the first row of the csv
    
    """
    
    columns = craftColumns(valuesDict, payloadJSON)
    if country not in os.listdir(basePath + 'csvs\\'):
        os.mkdir(basePath + 'csvs\\{}'.format(country))

    with open(basePath + 'csvs\\{}\\{}.csv'.format(country, fileName), 'w', newline='') as file:

        
        serverResp = response['data']
        writer = csv.writer(file)
        writer.writerow(columns)

        for i in range(len(serverResp)):

            writer.writerow(serverResp[i]['d'])

In [26]:
createCsv('smth', 'canada', resp, valuation, json.loads(open(basePath + 'jsonFiles\\ActualFiles\\canadaValuationPayload.json').read()))

In [27]:
res = getCountryTable(basePath + "jsonFiles\\ActualFiles\\canadaOscillatorsPayload.json", 'nigeria', reqRows = 30000)

In [35]:
def countryAllCSVS(payloadsPath, country):
    
    tables = []
    tablesCap = []
    payloads = os.listdir(payloadsPath)
    
    
    for payload in payloads:
        tableName = extractCatName('canada',payload)
        tablesCap.append(tableName)
        tables.append(tableName.lower())
    
    
    for table in tables:

        for dic in dictsList:
            
            if dic['title'] == table:
                
                #print("{} == {}".format(dic['title'],table))
                payloadString = basePath + "jsonFiles\\ActualFiles\\canada{}Payload.json".format(tablesCap[tables.index(table)])
                resp = getCountryTable((payloadString), country, reqRows = 20000)
                createCsv(country+table.capitalize(), country[1], resp, dic, json.loads(open(basePath + 'jsonFiles\\ActualFiles\\canada{}Payload.json'.format(tablesCap[tables.index(table)])).read()))
   
    
countryAllCSVS(basePath + 'jsonFiles\\ActualFiles\\', 'egypt')


In [29]:
def getAllTablesAllCountries(payloadsPath):
    
    
    """
    DESCRIPTION: This function loops over all the tables for all countries and saves them into csvs in a specified folder 
    """
    
    failed_list = []
    i = 0
    countries =  countryLinksFromPage('https://www.tradingview.com/markets/stocks-usa/market-movers-large-cap/')
    print("There are {} countries\n", format(len(countries)))
    for country in countries:
        i += 1
        print("Doing country number {}".format(i))
        try:
            print("Trying to grab the csvs for {}".format(country[1]))
            countryAllCSVS(payloadsPath, country[1])
            print("SUCCESS\n\n\n")
            time.sleep(1)


        except:
            
            try:
            
                if country[1] == 'usa':
                    countryAllCSVS(payloadsPath, 'america')

                elif country[1] == 'united-kingdom':
                    countryAllCSVS(payloadsPath, 'uk')
                    
                elif country[1] == 'south-africa':
                    countryAllCSVS(payloadsPath, 'rsa')

                elif '-' in country[1]:
                    countryAllCSVS(payloadsPath, country[1].replace('-',''))
                    
                



                    print("WE FAILED")
                    print("The country we failed was: ", country[1])
                    print("\n\n\n")
                    
            except:
                failed_list.append(country)
                
    return failed_list
            
            

    
    
    