In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from rich.console import Console
from rich.progress import track

In [2]:
LEN = 1807

In [3]:
HEADERS = {'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" }

In [4]:
def get_HTML(URL, params = None):
    r = requests.get(URL, headers = HEADERS, params = params)
    return r

In [5]:
def curr_parse(URL):
    curr_dict = {}
    html = get_HTML(URL)
    if html.status_code == 200:
        soup = BeautifulSoup(html.text, "html.parser")
        items = soup.find_all("div", class_ = "SecurityColumn__cellPriceSecurities_cswKs")
        curr_dict["$"] = float(items[0].text.replace("\xa0₽","").replace(",","."))
        curr_dict["€"] = float(items[1].text.replace("\xa0₽","").replace(",","."))
        curr_dict["₽"] = 1.0
    return curr_dict
curr_dict = curr_parse("https://www.tinkoff.ru/invest/currencies/")

In [6]:
def parse(URL,Len):
    stock_params_dict = {}
    html = get_HTML(URL)
    if html.status_code == 200:
        soup = BeautifulSoup(html.text, "html.parser")
        items = soup.find_all("a", class_ = "Link-module__link__Wv1b Link-module__link_theme_default_gf5wq")
        
        stock_title_list = pd.Series([item.find('div', class_ = "Caption__caption_cyYZT") \
                         for item in items if item.find('div', class_ = "Caption__caption_cyYZT") ]).apply(lambda i: i.text)
        stock_ticker_list = pd.Series([item.find('div', class_ = "Caption__subcaption_xTAKS") \
                         for item in items if item.find('div', class_ = "Caption__subcaption_xTAKS") ]).apply(lambda i: i.text)
        stock_price_list = [price.replace("\xa0","") for price in pd.Series([item.find('div', class_ = "SecurityColumn__cellPriceSecurities_cswKs") \
                         for item in items if item.find('div', class_ = "SecurityColumn__cellPriceSecurities_cswKs") ]).apply(lambda i: i.text)]
        stock_yield_listok = pd.Series([item.find('span', class_ = "Money-module__money_UwC2N") \
                         for item in items if item.find('span', class_ = "Money-module__money_UwC2N") ]).apply(lambda i: i.text)
        stock_yield_list = [yld.replace("\xa0","") for yld in stock_yield_listok[::2]]
        for i in range(Len):
            stock_params_dict[stock_title_list[i]] = [stock_ticker_list[i],stock_price_list[i], stock_yield_list[i] ]
    else:
        print("Error")
    return  stock_params_dict


In [7]:
console = Console()
with console.status("Parsing...", spinner = "monkey"):
    data = parse("https://www.tinkoff.ru/invest/stocks/?country=All&orderType=Asc&sortType=ByName&start=0&end=" + str(LEN), LEN)

Output()

In [8]:
def struct(data):
    all_df = np.transpose(pd.DataFrame(data))
    all_df.columns = ["Ticker","Price","Nom_Yield"]
    all_df["Currency"] = all_df["Nom_Yield"].apply(lambda i: i[-1])
    all_df["Nom_Yield"] = all_df["Nom_Yield"].apply(lambda i: float(i.replace("+","").replace("−","-").replace(",",".")[:-1]))
    all_df["Price"] = all_df["Price"].apply(lambda i: float(i.replace(",",".")[:-1]))
    all_df["Yield_rate"] = (all_df["Price"] / (all_df["Price"] - all_df["Nom_Yield"])).apply(lambda i: round(i, 2) - 1)
    all_df["Currency_in_RUB"] = all_df["Currency"].apply(lambda i: curr_dict[i])
    all_df["Price_in_RUB"] = all_df["Price"] * all_df["Currency_in_RUB"]
    all_df["Nom_Yield_in_RUB"] = all_df["Nom_Yield"] * all_df["Currency_in_RUB"]
    all_df = all_df.where(all_df.notna(), None)
    return all_df


In [18]:
df = struct(data)
credentials = ServiceAccountCredentials
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']

credentials = ServiceAccountCredentials.from_json_keyfile_name('test-333300-1860a35c4df3.json', scope)

gc = gspread.authorize(credentials)



In [10]:
ws = gc.open("test").get_worksheet(0)
ws.update([df.columns.values.tolist()] + df.values.tolist())

{'spreadsheetId': '1XIARKYc4ANNUEXr84YZdkKAthzVKUyJQl3_1bpWe5gs',
 'updatedRange': 'Sheet_1!A1:H1805',
 'updatedRows': 1805,
 'updatedColumns': 8,
 'updatedCells': 14438}

In [11]:
def forecast(df):
        forecast_list = []
        ticker_list = df.Ticker.tolist()
        for ticker in ticker_list:
            html = get_HTML("https://www.tinkoff.ru/invest/stocks/" + ticker + "/")
            soup = BeautifulSoup(html.text, "html.parser")
            items = soup.find_all("div", class_ = "ShortPrognosisBlock__value_k96ti")
            if len(items) > 0 :
                 forecast_list.append(items[0].text)
            else:
                forecast_list.append(None)
        return forecast_list


In [23]:
forecast(df.iloc[:100])

['52\xa0$+25,18\xa0$\xa0(93,89%)',
 '210\xa0$+61,63\xa0$\xa0(41,54%)',
 '33,25\xa0$+17,39\xa0$\xa0(109,65%)',
 '72\xa0$+44,64\xa0$\xa0(163,16%)',
 '46\xa0$+23,51\xa0$\xa0(104,54%)',
 '29,5\xa0$+7,26\xa0$\xa0(32,64%)',
 '183,4\xa0$+12,08\xa0$\xa0(7,05%)',
 None,
 '30\xa0$+11,13\xa0$\xa0(58,98%)',
 '80,67\xa0$−1,04\xa0$\xa0(−1,27%)',
 '70\xa0$−8,84\xa0$\xa0(−11,21%)',
 '30\xa0$−3,63\xa0$\xa0(−10,79%)',
 '142\xa0$+13,43\xa0$\xa0(10,45%)',
 '130\xa0$+12,99\xa0$\xa0(11,1%)',
 '35,34\xa0$+20,77\xa0$\xa0(142,55%)',
 '55\xa0$+19,64\xa0$\xa0(55,54%)',
 '410\xa0$+99,3\xa0$\xa0(31,96%)',
 '56,8\xa0$+12,45\xa0$\xa0(28,07%)',
 '26\xa0$+6,36\xa0$\xa0(32,38%)',
 None,
 '380,72\xa0$+12,49\xa0$\xa0(3,39%)',
 '41\xa0$+11,4\xa0$\xa0(38,51%)',
 '136,75\xa0$+55,31\xa0$\xa0(67,92%)',
 '103,6\xa0$+46,05\xa0$\xa0(80,02%)',
 '228,34\xa0$+28,82\xa0$\xa0(14,44%)',
 '51\xa0$+24,96\xa0$\xa0(95,85%)',
 '124\xa0$+36,13\xa0$\xa0(41,12%)',
 None,
 '760\xa0$+88,42\xa0$\xa0(13,17%)',
 None,
 '251,5\xa0$+22,88\xa0$\xa0(1