## **Mail Send**

In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication


def scrape_and_save_data():
    data = {
        'Site': [],
        'Title': [],
        'Link': [],
        'Date': []
    }

    def scrape_cbr_anime():
        site_name = "CBR/Anime"
        base_url = 'https://www.cbr.com/'

        url = 'https://www.cbr.com/category/anime/'

        response = requests.get(url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            article_blocks = soup.find_all('div', class_='w-display-card-content')
            for block in article_blocks:
                title_element = block.find('h5', class_='display-card-title').find('a')
                title = title_element.text.strip()
                relative_link = title_element['href']
                full_link = urljoin(base_url, relative_link)
                date_element = block.find('time', class_='display-card-date')['datetime']
                date = date_element.split('T')[0]

                data['Site'].append(site_name)
                data['Title'].append(title)
                data['Link'].append(full_link)
                data['Date'].append(date)

    def scrape_hashnode_data_science():
        site_name = "Hashnode/Data Science"
        base_url = 'https://hashnode.com/n/data-science'

        url = 'https://hashnode.com/n/data-science'

        response = requests.get(url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            post_sections = soup.find_all('section', class_='flex flex-col gap-2 sm:gap-4')
            for section in post_sections:
                title_element = section.find('h1', class_='font-heading text-base sm:text-xl font-semibold sm:font-bold text-slate-700 dark:text-slate-200 hn-break-words cursor-pointer')
                title = title_element.text.strip()
                link_element = title_element.find_parent('a', href=True)
                link = link_element['href']
                date_element = section.find('p', class_='text-sm text-slate-500 dark:text-slate-400 font-normal')
                date = date_element.text.strip()

                data['Site'].append(site_name)
                data['Title'].append(title)
                data['Link'].append(link)
                data['Date'].append(date)

    def scrape_wired_science():
        site_name = "Wired/Science"
        base_url = 'https://www.wired.com/'

        url = 'https://www.wired.com/category/science/'

        response = requests.get(url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            article_blocks = soup.find_all('div', class_='SummaryItemContent-eiDYMl')
            for block in article_blocks:
                title_element = block.find('h3', class_='SummaryItemHedBase-hiFYpQ')
                title = title_element.text.strip()
                relative_link = block.find('a', class_='SummaryItemHedLink-civMjp')['href']
                full_link = urljoin(base_url, relative_link)
                category_element = block.find('span', class_='RubricName-fVtemz')
                category = category_element.text.strip() if category_element else "N/A"
                date_element = block.find('time')
                date = date_element.text.strip() if date_element else category

                data['Site'].append(site_name)
                data['Title'].append(title)
                data['Link'].append(full_link)
                data['Date'].append(date)

    def scrape_interesting_engineering():
        site_name = "InterestingEngineering"
        base_url = 'https://interestingengineering.com/'

        url = 'https://interestingengineering.com/news/page/1'

        # Send an HTTP GET request to the website
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the HTML content of the page using BeautifulSoup
            soup = BeautifulSoup(response.text, 'html.parser')

            # Find and print the article titles, URLs, and date information
            article_blocks = soup.find_all('div', class_='Category_result__description__iz_rw')  # Replace with the actual HTML element and class name
            for block in article_blocks:
                title_link_element = block.find('a', href=True)
                title = title_link_element.find('h2', class_='Category_result__header__HQgVv').text.strip()
                link = urljoin(base_url, title_link_element['href'])  # Prepend base URL to relative links
                author_element = block.find('a', class_='Category_result__author__name__In7jd')
                author = author_element.text.strip()
                date_element = block.find('span', class_='Category_result__author__publishTime__nwLBU')
                date = date_element.text.strip()

                data['Site'].append(site_name)
                data['Title'].append(title)
                data['Link'].append(link)
                data['Date'].append(date)

    def scrape_techcrunch_startups():
        site_name = 'TechCrunch'
        # Replace 'your_url_here' with the actual URL of the website you want to scrape
        url = 'https://techcrunch.com/category/startups/'

        # Send an HTTP GET request to the website
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the HTML content of the page using BeautifulSoup
            soup = BeautifulSoup(response.text, 'html.parser')

            # Find and print the titles, links, and dates of the blog posts
            post_blocks = soup.find_all('div', class_='post-block')  # Replace with the actual HTML element and class name
            for block in post_blocks:
                title = block.find('h2', class_='post-block__title').text.strip()
                link = block.find('a', class_='post-block__title__link')['href']
                date_element = block.find('time')
                date = date_element.text  # Extract date part from the datetime attribute
                data['Site'].append(site_name)
                data['Title'].append(title)
                data['Link'].append(link)
                data['Date'].append(date)

    # Call the scraping functions for each site
    scrape_cbr_anime()
    scrape_hashnode_data_science()
    scrape_interesting_engineering()
    scrape_wired_science()
    scrape_techcrunch_startups()
    # Create a DataFrame from the collected data
    df = pd.DataFrame(data)

    # Save the DataFrame to an Excel file
    df.to_excel('web_scraping_results.xlsx', index=False)
    # Email configuration
    smtp_server = 'smtp.gmail.com'
    smtp_port = 587
    smtp_username = 'ortakallan@gmail.com'  # Replace with your Gmail email address
    smtp_password = 'iubq cogz nxyi nnqs'  # Replace with your generated app password

    # Recipient email addresses
    recipient_emails = ['fatih.821@outlook.com', 'yusuf.cinarci@gmail.com']  # Replace with your recipient email addresses

    # Email body
    body = 'Please find the attached web scraping results.'

    # Attach the Excel file
    with open('web_scraping_results.xlsx', 'rb') as file:
        attachment = MIMEApplication(file.read(), _subtype="xlsx")
        attachment.add_header('Content-Disposition', 'attachment', filename='web_scraping_results.xlsx')

    # Send the email to each recipient separately
    for recipient_email in recipient_emails:
        msg = MIMEMultipart()
        msg['From'] = smtp_username
        msg['To'] = recipient_email
        msg['Subject'] = 'Web Scraping Results'

        msg.attach(MIMEText(body, 'plain'))
        msg.attach(attachment)

        server = smtplib.SMTP(smtp_server, smtp_port)
        server.starttls()
        server.login(smtp_username, smtp_password)
        server.sendmail(smtp_username, recipient_email, msg.as_string())
        server.quit()

    print('Email sent successfully to the recipients:', ', '.join(recipient_emails))

scrape_and_save_data()