## Extract News

##### Extract data of a News from www.dailyfx.com

In [6]:
from bs4 import BeautifulSoup
import requests

## www.dailyfx.com 

def extract_dailyfx(url_add):

    # Send an HTTP GET request to the URL
    response = requests.get(url_add)

    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the specific <div> you want to extract content from class

    target_div = soup.find('article')
    header_news = soup.find('h1', class_='dfx-articleHead__header m-0').get_text()
    date_news = soup.find('time').get_text()

    # Initialize an empty list to store the extracted text in order
    all_text = []
    target_tags = ['p', 'li', 'h1', 'h2']

    for element in target_div.descendants:
        if element.name in target_tags:
            text = element.get_text().strip()
            if text:  # Check if the text is not empty
                all_text.append(text)

    final_text = '\n'.join(all_text)

    # Print or save the final_text, header_news, date_news

    return final_text, header_news, date_news[1:-1]

##### Extract all News from www.dailyfx.com

In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sys
import re
import warnings
warnings.filterwarnings("ignore")

# Initialize an empty list to store dictionaries containing news data
news_data = []

# Initialize an empty list to store dictionaries containing all of news urls and their types
hrefs_news = [] 

Main_url = f'https://www.dailyfx.com/eur-usd/news-and-analysis'

response = requests.get(Main_url)

# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')


# Find the news_analysis 

news_analysis = soup.find('div', {'class': 'dfx-newsAnalysis jsdfx-newsAnalysis pt-2'})

analysis = news_analysis.find_all('a')

for div in analysis:
    href = div.get('href')
    hrefs_news.append({
        'url': href,
        'type_news': 'news_analysis'
    })


# Find the news_forecasts 

forecast = soup.find('div', {'class': 'dfx-widget dfx-forecastsWidget mw-100 pb-3 pb-lg-4'})

forecast_eur = forecast.find_all('div', {'class': 'dfx-forecastsWidget__singleForecast py-2'})

for div in forecast_eur:
    a_element = div.find('a')
    if a_element:
        href = a_element.get('href')
        if href == 'https://www.dailyfx.com/sentiment':
            continue
        else:
                
            hrefs_news.append({
            'url': href,
            'type_news': 'forecast'
            })


# Find the news_realTimeNews__tweetContent

tweet_contents = soup.find_all('div', class_='dfx-realTimeNews__tweetContent')


for tweet_content in tweet_contents:

    text_divs = tweet_content.find_all('div', class_='dfx-realTimeNews__text text-dark')
    
    for text_div in text_divs:
        
        url_pattern = re.compile(r'https://t\.co/[a-zA-Z0-9]+')
        urls = url_pattern.findall(text_div.text)
        if len(urls)>0:
            href = urls[0]
            href_1 = requests.head(href, allow_redirects=True)
            if href_1.url == 'https://www.dailyfx.com/sentiment':
                continue
            else:
                hrefs_news.append({
                'url': href_1.url,
                'type_news': 'tweet_contents'
                })


# Extract features from any URLs
for href in hrefs_news:
    print(href['url'])
    fi_text, he_news, date = extract_dailyfx(href['url'])

    news_data.append({
        'link': href['url'],
        'date': date,
        'title': he_news,
        'news_content': fi_text,
        'type_news': href['type_news']
    })

# Create a DataFrame from the list of dictionaries
News = pd.DataFrame(news_data)

https://www.dailyfx.com/news/forex-usd-dollar-on-edge-before-fed-decision-technical-setups-on-eur-usd-gbp-usd-20231212.html
https://www.dailyfx.com/news/euro-eur-update-eur-usd-testing-1-0800-zew-sentiment-better-than-expected-20231212.html
https://www.dailyfx.com/news/forex-usd-dollar-forecast-all-eyes-on-us-inflation-setups-on-eur-usd-usd-jpy-gbp-usd-20231211.html
https://www.dailyfx.com/news/euro-price-forecast-colossal-data-filled-week-for-eur-usd-20231211.html
https://www.dailyfx.com/news/forex-markets-week-ahead-gold-eur-usd-gbp-usd-usd-jpy-us-inflation-fed-ecb-boe-20231210.html
https://www.dailyfx.com/news/forex-us-cpi-fed-decision-to-guide-usd-dollar-setups-on-eur-usd-usd-jpy-gbp-usd-20231210.html
https://www.dailyfx.com/news/euro-price-action-setups-eur-usd-eur-gbp-post-gdp-revision-20231207.html
https://www.dailyfx.com/news/eu-breaking-news-eu-gdp-revised-lower-confirming-stagnant-growth-20231207.html
https://www.dailyfx.com/news/forex-euro-price-forecast-eur-threatened-by-do

In [5]:
News

Unnamed: 0,link,date,title,news_content,type_news
0,https://www.dailyfx.com/news/forex-usd-dollar-...,\n2023-12-12 17:30:00\n,"US Dollar on Edge Before Fed Decision, Technic...","US Dollar on Edge Before Fed Decision, Technic...",news_analysis
1,https://www.dailyfx.com/news/euro-eur-update-e...,\n2023-12-12 11:01:42\n,"Euro (EUR) Update: EUR/USD Testing 1.0800, ZEW...","Euro (EUR) Update: EUR/USD Testing 1.0800, ZEW...",news_analysis
2,https://www.dailyfx.com/news/forex-usd-dollar-...,\n2023-12-11 19:30:00\n,"US Dollar Forecast: All Eyes on US Inflation, ...","US Dollar Forecast: All Eyes on US Inflation, ...",news_analysis
3,https://www.dailyfx.com/news/euro-price-foreca...,\n2023-12-11 08:30:00\n,Euro Price Forecast: Colossal Data Filled Week...,Euro Price Forecast: Colossal Data Filled Week...,news_analysis
4,https://www.dailyfx.com/news/forex-markets-wee...,\n2023-12-10 17:00:00\n,"Markets Week Ahead: Gold, EUR/USD, GBP/USD, US...","Markets Week Ahead: Gold, EUR/USD, GBP/USD, US...",news_analysis
5,https://www.dailyfx.com/news/forex-us-cpi-fed-...,\n2023-12-10 07:00:00\n,"US CPI, Fed Decision to Guide US Dollar, Setup...","US CPI, Fed Decision to Guide US Dollar, Setup...",news_analysis
6,https://www.dailyfx.com/news/euro-price-action...,\n2023-12-07 21:00:12\n,"Euro Price Action Setups: EUR/USD, EUR/GBP Pos...","Euro Price Action Setups: EUR/USD, EUR/GBP Pos...",news_analysis
7,https://www.dailyfx.com/news/eu-breaking-news-...,\n2023-12-07 10:45:35\n,"EU Breaking News: EU GDP Revised Lower, Confir...","EU Breaking News: EU GDP Revised Lower, Confir...",news_analysis
8,https://www.dailyfx.com/news/forex-euro-price-...,\n2023-12-06 07:43:12\n,Euro Price Forecast: EUR Threatened by Dovish ...,Euro Price Forecast: EUR Threatened by Dovish ...,news_analysis
9,https://www.dailyfx.com/news/euro-eur-latest-d...,\n2023-12-05 11:57:30\n,Euro (EUR) Latest: Dovish ECB Commentary Weigh...,Euro (EUR) Latest: Dovish ECB Commentary Weigh...,news_analysis
