In [1]:
import cfscrape 
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
import warnings
warnings.filterwarnings('ignore')

In [None]:
def investing_com():

    def get_data(url, table_idx=1, currency=False):
        today = pd.to_datetime(datetime.now().date())

        scraper = cfscrape.create_scraper()
        scraped_data = scraper.get(url)

        data_frame = pd.read_html(scraped_data.text)[table_idx]

        data_frame.rename(
            columns={'Дата': 'date', 'Цена': 'price'}, inplace=True)

        result = data_frame[['date', 'price']]
        result = pd.DataFrame(result)
        
        if currency:
            result['price'] = result['price']/10000

        else:
            
            try:
                result['price'] = result['price'].str.replace('.', '')
                result['price'] = result['price'].str.replace(',', '.')
                result['price'] = pd.to_numeric(result['price'])
            except:
                pass
            
        result['date'] = pd.to_datetime(result['date'], dayfirst=True)

        result = result[result['date'] != today]

        result.drop_duplicates(inplace=True)
        result.reset_index(inplace=True, drop=True)

        return result

    ################################################################
    ########################### GDAXI ##############################
    ################################################################

    gdaxi = get_data(
        'https://ru.investing.com/indices/germany-30-historical-data')

    historical_data_gdaxi = pd.read_excel(
        './data/investing_com/GDAXI.xlsx', index_col=0)

    gdaxi_final = pd.concat([gdaxi, historical_data_gdaxi], axis=0).reset_index(
        drop=True).drop_duplicates('date')

    with pd.ExcelWriter("./data/investing_com/GDAXI.xlsx",
                        date_format="YYYY-MM-DD", datetime_format="YYYY-MM-DD") as writer:
        gdaxi_final.to_excel(writer, sheet_name='GDAXI')

    ################################################################
    ########################### IMOEX ##############################
    ################################################################

    imoex = get_data(
        'https://ru.investing.com/indices/mcx-historical-data', table_idx=0)

    historical_data_imoex = pd.read_excel(
        './data/investing_com/GDAXI.xlsx', index_col=0)

    imoex_final = pd.concat([imoex, historical_data_imoex], axis=0).reset_index(
        drop=True).drop_duplicates('date')

    with pd.ExcelWriter("./data/investing_com/IMOEX.xlsx",
                        date_format="YYYY-MM-DD", datetime_format="YYYY-MM-DD") as writer:
        imoex_final.to_excel(writer, sheet_name='IMOEX')

    ################################################################
    ########################### SPX ################################
    ################################################################

    spx = get_data(
        'https://ru.investing.com/indices/us-spx-500-historical-data')

    historical_data_spx = pd.read_excel(
        './data/investing_com/SPX.xlsx', index_col=0)

    imoex_final = pd.concat([spx, historical_data_spx], axis=0).reset_index(
        drop=True).drop_duplicates('date')

    with pd.ExcelWriter("./data/investing_com/SPX.xlsx",
                        date_format="YYYY-MM-DD", datetime_format="YYYY-MM-DD") as writer:
        imoex_final.to_excel(writer, sheet_name='SPX')

    ################################################################
    ########################### SSEK ###############################
    ################################################################

    ssek = get_data(
        'https://ru.investing.com/indices/shanghai-composite-historical-data', table_idx=0)

    historical_data_ssek = pd.read_excel(
        './data/investing_com/SSEC.xlsx', index_col=0)

    imoex_final = pd.concat([ssek, historical_data_ssek], axis=0).reset_index(
        drop=True).drop_duplicates('date')

    with pd.ExcelWriter("./data/investing_com/SSEK.xlsx",
                        date_format="YYYY-MM-DD", datetime_format="YYYY-MM-DD") as writer:
        imoex_final.to_excel(writer, sheet_name='SSEC')

    ################################################################
    ########################### URALS ##############################
    ################################################################

    urals = get_data(
        'https://ru.investing.com/commodities/crude-oil-urals-spot-futures-historical-data', table_idx=0)

    historical_data_urals = pd.read_excel(
        './data/investing_com/urals.xlsx', index_col=0)

    imoex_final = pd.concat([urals, historical_data_urals], axis=0).reset_index(
        drop=True).drop_duplicates('date')

    with pd.ExcelWriter("./data/investing_com/urals.xlsx",
                        date_format="YYYY-MM-DD", datetime_format="YYYY-MM-DD") as writer:
        imoex_final.to_excel(writer, sheet_name='urals')

    ################################################################
    ########################### EUR/USD ############################
    ################################################################

    currency = get_data(
        'https://ru.investing.com/currencies/eur-usd-historical-data', currency=True)

    historical_data_currency = pd.read_excel(
        './data/investing_com/curr.xlsx', index_col=0)

    imoex_final = pd.concat([currency, historical_data_currency], axis=0).reset_index(
        drop=True).drop_duplicates('date')

    with pd.ExcelWriter("./data/investing_com/curr.xlsx",
                        date_format="YYYY-MM-DD", datetime_format="YYYY-MM-DD") as writer:
        imoex_final.to_excel(writer, sheet_name='urals')

    return None