In [39]:
# Main import block
import asyncio
import datetime
import signal
import time
import warnings
from contextlib import contextmanager
from datetime import date, datetime
from io import StringIO

# Sneaky scrapper
import cfscrape
# Upload to google sheets
import gspread
# import df2gspread as d2g
import gspread_pandas as gsp
import numpy as np
import pandas as pd
import requests
# Upload to yandex
import yadisk
from bs4 import BeautifulSoup
from df2gspread import df2gspread as d2g
from oauth2client.service_account import ServiceAccountCredentials
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

warnings.filterwarnings("ignore")

In [40]:
async def lme_selenium_async():
    url = "https://www.lme.com/Metals/Non-ferrous#tabIndex=1"

    service = Service()
    driver = webdriver.Chrome(service=service)

    try:
        driver.maximize_window()
        driver.get(url=url)

        time.sleep(2)

        driver.execute_script(
            "window.scrollTo(0, window.scrollY + window.innerHeight);"
        )
        driver.execute_script(
            "window.scrollTo(0, window.scrollY + window.innerHeight);"
        )

        time.sleep(2)

        html_code = driver.page_source

        soup = BeautifulSoup(html_code, "html.parser")
        # Вытаскиваем данные из табличного блока
        data_raw = soup.find_all("div", class_="metal-block-row__blocks")

        # Приводим к тексту
        metalls_raw = data_raw[0].text

        # Убираем все лишнее
        metalls_raw = metalls_raw.replace(" ", "").replace("LME", "LME_").split("\n")
        metalls_raw = " ".join(metalls_raw).strip().split(" ")

        # Конфигурируем новый лист по металлам
        metalls_raw = [metalls_raw[i : i + 4] for i in range(0, len(metalls_raw), 4)]

        # Обрезаем до нужной инфы
        metalls_raw = [i[:2] for i in metalls_raw]

        # Приведем цены к числовому формату
        for metall in metalls_raw:
            metall[1] = pd.to_numeric(metall[1])

        # Формируем строку датафрейма по металлам
        dict_list = [{item[0]: item[1]} for item in metalls_raw]

        metall_df = pd.DataFrame()

        for i in dict_list:
            keys = list(i.keys())
            values = list(i.values())
            metall_df[keys[0]] = values

        # Дропаем ненужные колонки
        metall_df.drop(columns=["LME_AluminiumAlloy", "LME_NASAAC"], inplace=True)
        metall_df = metall_df.rename(
            columns={
                "LME_Aluminium": "aluminium",
                "LME_Copper": "copper",
                "LME_Lead": "lead",
                "LME_Nickel": "nickel",
                "LME_Zinc": "zink",
                "LME_Tin": "tin",
            }
        )

        # Достанем в вставим дату
        date_raw = soup.find_all("span", class_="metal-block-container__refreshed-on")
        date_raw = date_raw[0]
        date_raw = str(date_raw).split(">")[1]
        date_raw = date_raw.replace("\xa0", "").replace("</span", "")
        date_raw = pd.to_datetime(date_raw) - pd.Timedelta(days=1)

        metall_df["date"] = date_raw

        # Поменяем порядок колонок
        new_column_order = [
            "date",
            "aluminium",
            "copper",
            "lead",
            "nickel",
            "zink",
            "tin",
        ]
        metall_df = metall_df[new_column_order]

        # Сохраняем полученные результаты
        lme_db = pd.read_excel("./data/LME_db_new.xlsx", index_col=0)
        lme_db = pd.concat([lme_db, metall_df]).reset_index(drop=True)

        # Дропаем дубликаты
        lme_db = lme_db.drop_duplicates(subset=lme_db.columns.to_list()[1:])

        # Сохраняем
        with pd.ExcelWriter(
            "./data/LME_db_new.xlsx",
            date_format="YYYY-MM-DD",
            datetime_format="YYYY-MM-DD",
        ) as writer:
            lme_db.to_excel(writer, sheet_name="LME_main")
        
        print("LME_main is done!!!")
        display(lme_db.tail(10))

    except Exception as error:
        print(error)

    finally:
        driver.close()
        driver.quit()


In [41]:
################################################################
#########################   KITCO (main) ########################
################################################################


# Ассинхронный парсер Kitco
async def kitco_parser_async():
    async def page_getter():
        try:
            url = "https://www.kitco.com/price/fixes/london-fix"

            service = Service()
            driver = webdriver.Chrome(service=service)
            # driver.maximize_window()
            driver.get(url=url)
            #time.sleep(2)

            driver.execute_script(
                "window.scrollTo(0, window.scrollY + window.innerHeight);"
            )
            driver.execute_script(
                "window.scrollTo(0, window.scrollY + window.innerHeight);"
            )

            time.sleep(2)

            html_code = driver.page_source
            
            return html_code

        except Exception as error:
            print(error)

        finally:
            driver.close()
            driver.quit()
            
    
    html_code = await page_getter()
    
    try:
        # Работа над полученными данными
        soup = BeautifulSoup(html_code)

        # Выделим данные и предобработаем для получения дневных данных
        day = soup.find_all("div", class_="border")[1]
        for i in day:
            day = list(i.find_all("div"))

        day = day[4:]

        day = [str(i) for i in day]

        day = [i.replace("<div>", "").replace("</div>", "") for i in day]

        day[0] = pd.to_datetime(day[0])

        day[1] = day[1].split("/")
        day[1] = day[1][1].strip().replace(",", "")
        day[1] = pd.to_numeric(day[1])

        day[2] = pd.to_numeric(day[2])

        day[3] = day[3].split("/")
        day[3] = day[3][1].strip().replace(",", "")
        day[3] = pd.to_numeric(day[3])

        day[4] = day[4].split("/")
        day[4] = day[4][1].strip().replace(",", "")
        day[4] = pd.to_numeric(day[4])

        df_row = (
            pd.DataFrame([day], columns=["Date", "Gold", "Silver", "Platinum", "Palladium"])
            .sort_values("Date")
            .reset_index(drop=True)
        )

        # Выделим данные и предобработаем для получения исторических данных
        historical = soup.find_all("div", class_="border")[2]
        for i in historical:
            historical = list(i.find_all("div"))

        historical = historical[5:]

        historical = [str(i) for i in historical]

        historical = [i.replace("<div>", "").replace("</div>", "") for i in historical]
        historical = [item for item in historical if "<div" not in item]

        # Разобьем по дням
        historical = [historical[i : i + 5] for i in range(0, len(historical), 5)]

        # Обработаем данные внутри каждого дня
        for i in historical:
            i[0] = pd.to_datetime(i[0])

            i[1] = i[1].split("/")
            i[1] = i[1][1].strip().replace(",", "")
            i[1] = pd.to_numeric(i[1])

            i[2] = pd.to_numeric(i[2])

            i[3] = i[3].split("/")
            i[3] = i[3][1].strip().replace(",", "")
            i[3] = pd.to_numeric(i[3])

            i[4] = i[4].split("/")
            i[4] = i[4][1].strip().replace(",", "")
            i[4] = pd.to_numeric(i[4])

        df_historical = (
            pd.DataFrame(
                historical, columns=["Date", "Gold", "Silver", "Platinum", "Palladium"]
            )
            .sort_values("Date")
            .reset_index(drop=True)
        )

        df_historical = pd.concat([df_historical, df_row]).reset_index(drop=True)
        kitko_db = pd.read_excel("./data/kitko_db.xlsx", index_col=0)

        kitko_db = (
            pd.concat([kitko_db, df_historical])
            .reset_index(drop=True)
            .drop_duplicates(subset=["Date"])
        )

        with pd.ExcelWriter(
            "./data/kitko_db.xlsx",
            date_format="YYYY-MM-DD",
            datetime_format="YYYY-MM-DD",
        ) as writer:
            kitko_db.to_excel(writer, sheet_name="kitco_metall")

        print("KITKO_main is done!!!")
        display(kitko_db.tail(10))
        
    except Exception as e:
        print(e)

In [42]:
url = "https://www.kitco.com/price/fixes/london-fix"
service = Service()
driver = webdriver.Chrome(service=service)
# driver.maximize_window()
driver.get(url=url)
#time.sleep(2)

driver.execute_script(
    "window.scrollTo(0, window.scrollY + window.innerHeight);"
)
driver.execute_script(
    "window.scrollTo(0, window.scrollY + window.innerHeight);"
)

time.sleep(2)

html_code = driver.page_source

In [43]:
soup = BeautifulSoup(html_code)

# Выделим данные и предобработаем для получения дневных данных
day = soup.find_all("div", class_="border")[1]
for i in day:
    day = list(i.find_all("div"))

day = day[4:]

day = [str(i) for i in day]

day = [i.replace("<div>", "").replace("</div>", "") for i in day]

for i in day:
    if type(i) == str:
        i = i.replace("<!-- -->", "")
    else:
        pass

day[0] = pd.to_datetime(day[0])

day[1] = day[1].split("/")
day[1] = day[1][1].strip().replace(",", "")
day[1] = pd.to_numeric(day[1])

day[2] = pd.to_numeric(day[2])

day[3] = day[3].split("/")
day[3] = day[3][1].strip().replace(",", "")
day[3] = pd.to_numeric(day[3])

day[4] = day[4].split("/")
day[4] = day[4][1].strip().replace(",", "")
day[4] = pd.to_numeric(day[4])

df_row = (
    pd.DataFrame([day], columns=["Date", "Gold", "Silver", "Platinum", "Palladium"])
    .sort_values("Date")
    .reset_index(drop=True)
)

# Выделим данные и предобработаем для получения исторических данных
historical = soup.find_all("div", class_="border")[2]
for i in historical:
    historical = list(i.find_all("div"))

historical = historical[5:]

historical = [str(i) for i in historical]

historical = [i.replace("<div>", "").replace("</div>", "") for i in historical]
historical = [item for item in historical if "<div" not in item]

# Разобьем по дням
historical = [historical[i : i + 5] for i in range(0, len(historical), 5)]

ValueError: Unable to parse string "<!-- -->2640.95" at position 0

In [10]:
day

[Timestamp('2024-10-07 00:00:00'),
 '<!-- -->2640.95',
 '31.88',
 '986.00 / <!-- -->980.00',
 '1,021.00 / <!-- -->1,011.00']

In [17]:
day[1].replace("<!-- -->", "")

'2640.95'

In [21]:
day

[Timestamp('2024-10-07 00:00:00'),
 '<!-- -->2640.95',
 '31.88',
 '986.00 / <!-- -->980.00',
 '1,021.00 / <!-- -->1,011.00']

In [34]:
for i in day:
    if type(i) == str:
        i = i.replace("<!-- -->", "")
    else:
        pass


In [35]:
day

[Timestamp('2024-10-07 00:00:00'),
 '<!-- -->2640.95',
 '31.88',
 '986.00 / <!-- -->980.00',
 '1,021.00 / <!-- -->1,011.00']

In [37]:
day[1] = day[1].replace("<!-- -->", "")

In [38]:
day

[Timestamp('2024-10-07 00:00:00'),
 '2640.95',
 '31.88',
 '986.00 / <!-- -->980.00',
 '1,021.00 / <!-- -->1,011.00']