# **Scraping en temps réel de l'ETH dans le site Investing.com**

## **II- Scraping des données en temps réel**

In [1]:
import pandas as pd
import datetime
import requests
from requests.exceptions import ConnectionError
from bs4 import BeautifulSoup
import time

def scrap_content_div(web_content,class_path):
  scrap_content_div = web_content.find_all('div',{'class':class_path})
  try:
    divs = scrap_content_div[0].find_all('div')
    texts = [div.get_text() for div in divs]
  except IndexError:
    texts = []
  return texts


def scrap_content_dl(web_content,class_path):
  scrap_content_table = web_content.find_all('dl',{'class':class_path})
  try:
    spans = scrap_content_table[0].find_all('dd',{'class':"text-[#232526] whitespace-nowrap"})
    texts = [span.get_text() for span in spans]
  except IndexError:
    texts = []
  return texts

import re

def real_time_price():
  url = 'https://www.investing.com/crypto/ethereum/eth-usd'
  try:
    r = requests.get(url)
    web_content = BeautifulSoup(r.text,'html.parser')
    texts_div = scrap_content_div(web_content,"flex flex-wrap gap-x-4 gap-y-2 items-center md:gap-6 mb-3 md:mb-0.5")
    texts_spans = scrap_content_dl(web_content,"flex-1 sm:mr-8")

    if texts_div != []:
      price , data = texts_div[0], texts_div[1:]
      # Définition de l'expression régulière
      pattern = re.compile(r'([-+]?[0-9]*\.?[0-9]+)\(([-+]?\d*\.?\d+%)\)')

      # Application de l'expression régulière aux données
      match = pattern.match(data[0])

      # Extraction des résultats
      change = match.group(1).strip('+')
      change_rate = match.group(2).strip('+')

    else:
      price , data = [], []

    if texts_spans != []:
      bid,ask = texts_spans[0].split('/')
      volume, market_cap , day_range, week_range_52 = texts_spans[1], texts_spans[2] ,texts_spans[3], texts_spans[4]
    else:
      bid , ask , volume, market_cap , day_range, week_range_52 = [], [],[], [], [], []

  except ConnectionError:
    price , change, change_rate, bid , ask , volume, market_cap , day_range, week_range_52 =[], [],[], [],[], [],[], [],[]

  return price , change, change_rate, bid , ask , volume, market_cap , day_range, week_range_52


print(real_time_price())

from google.colab import drive
drive.mount('/content/drive')


import pandas as pd
import datetime
import os

while True:
    info = []
    col = []
    time_stamp = datetime.datetime.now() - datetime.timedelta(hours=1)
    time_stamp = time_stamp.strftime('%Y-%m-%d %H:%M:%S')
    price, change, change_rate, bid, ask, volume, market_cap, day_range, week_range_52 = real_time_price()
    info.extend([price, change, change_rate, bid, ask, volume, market_cap, day_range, week_range_52])
    col = ['time_stamp', 'price', 'change','change_rate','bid', 'ask', 'volume', 'market_cap', 'day_range', 'week_range_52']
    col_values = [time_stamp]
    col_values.extend(info)
    df = pd.DataFrame([col_values], columns=col)

    # Transformer 'price' en float
    df['price'] = pd.to_numeric(df['price'].str.replace(',', ''), errors='coerce')

    # Transformer 'change' en float
    df['change'] = pd.to_numeric(df['change'].str.replace(',', ''), errors='coerce')

    # Transformer 'change_rate' en float (enlever le '%' avant de convertir)
    df['change_rate'] = pd.to_numeric(df['change_rate'].str.rstrip('%'), errors='coerce') / 100.0

    # Transformer 'bid' en float
    df['bid'] = pd.to_numeric(df['bid'].str.replace(',', ''), errors='coerce')

    # Transformer 'ask' en float
    df['ask'] = pd.to_numeric(df['ask'].str.replace(',', ''), errors='coerce')

    # Transformer 'volume' en int
    df['volume'] = pd.to_numeric(df['volume'].str.replace('B', '').str.replace('M', '').str.replace('K', ''), errors='coerce')
    df['volume'] = df['volume'] * 1e12

    # Transformer 'market_cap' en int
    df['market_cap'] = pd.to_numeric(df['market_cap'].str.replace('B', '').str.replace('M', '').str.replace('K', ''), errors='coerce')
    df['market_cap'] = df['market_cap'] * 1e12

    # Append to CSV without header
    df.to_csv(str(time_stamp[0:11]) + 'ETH_stock_data.csv', mode='a', header=False, index=False)

    # Save with header
    chemin_fichier_csv = '/content/drive/MyDrive/Colab files/ETH_stock_data.csv'

    df.to_csv(chemin_fichier_csv, mode='a', header=not os.path.exists(chemin_fichier_csv), index=False)

    print(col_values)

('2,319.77', '66.48', '2.95%', '2,319.77 ', ' 2,319.78', '11.92B', '278.42B', '2,220-2,323.8', '1,152.53-2,323.8')
Mounted at /content/drive
['2023-12-07 14:44:42', '2,319.77', '66.48', '2.95%', '2,319.77 ', ' 2,319.78', '11.92B', '278.42B', '2,220-2,323.8', '1,152.53-2,323.8']
['2023-12-07 14:44:44', '2,323.96', '70.67', '3.14%', '2,323.96 ', ' 2,323.97', '11.92B', '278.42B', '2,220-2,324.62', '1,152.53-2,324.62']
['2023-12-07 14:44:45', '2,323.96', '70.67', '3.14%', '2,323.96 ', ' 2,323.97', '11.92B', '278.42B', '2,220-2,324.62', '1,152.53-2,324.62']
['2023-12-07 14:44:46', '2,323.96', '70.67', '3.14%', '2,323.96 ', ' 2,323.97', '11.92B', '278.42B', '2,220-2,324.62', '1,152.53-2,324.62']
['2023-12-07 14:44:47', '2,323.96', '70.67', '3.14%', '2,323.96 ', ' 2,323.97', '11.92B', '278.42B', '2,220-2,324.62', '1,152.53-2,324.62']
['2023-12-07 14:44:49', '2,323.96', '70.67', '3.14%', '2,323.96 ', ' 2,323.97', '11.92B', '278.42B', '2,220-2,324.62', '1,152.53-2,324.62']
['2023-12-07 14:44:49

KeyboardInterrupt: ignored

In [None]:
from google.colab import drive
drive.mount('/content/drive')