In [None]:
import requests
from bs4 import BeautifulSoup
from googletrans import Translator
import win32com.client as win32
import base64
import os

# Caminho correto das bandeiras
def get_flag_path(site_name):
    flags_directory = "C:\\Users\\Flavio de Brito\\OneDrive - 3hzrmc\\Documentos\\GitHub\\resumo-de-noticias\\bandeiras"
    flag_mapping = {
        "SCMP - South China Morning Post": "hong-kong.png",
        "RT - Rede de televisão russa": "russia.png",
        "NK News": "coreia-do-norte.png",
        "Shine - Shanghai Daily": "china.png",
        "Sputnik Internacional": "russia.png",
        "ICL Notícias": "brasil.png",
        "Brasil 247": "brasil.png",
        "Jornal GGN": "brasil.png",
        "Xinhua": "china.png",
        "Sputnik Brasil": "brasil.png",
        "Opera Mundi": "brasil.png",
        "Globo": "brasil.png",
        "CNN Brasil": "brasil.png",
        "The New York Times": "eua.png",
        "The Washington Post": "eua.png",
        "The Guardian": "reino-unido.png",
    }
    return os.path.join(flags_directory, flag_mapping.get(site_name, 'default.png'))

# Função genérica para capturar notícias de um site
def get_news(url, base_url=None, class_h=None, class_h2=None, translate=False):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
    page = requests.get(url, headers=headers)
    
    if page.status_code != 200:
        print(f"Falha ao acessar a página. Status code: {page.status_code}")
        return {}
    
    soup = BeautifulSoup(page.text, 'html.parser')
    noticias = soup.find_all('a')
    news_dict = {}
    index = 1
    translator = Translator() if translate else None

    for noticia in noticias:
        titulo_original = noticia.get_text(strip=True)
        href = noticia.get('href')

        if titulo_original and href and len(titulo_original) > 20:
            if base_url and href.startswith('/'):
                href = base_url + href
            if translator:
                try:
                    titulo_traduzido = translator.translate(titulo_original, src='auto', dest='pt').text
                except Exception as e:
                    titulo_traduzido = titulo_original
                    print(f"Erro ao traduzir título: '{titulo_original}'. Erro: {e}")
                titulo_final = titulo_traduzido
            else:
                titulo_final = titulo_original

            if (class_h and noticia.h2 and class_h in noticia.h2.get('class')) or \
               (class_h2 and noticia.h2 and class_h2 in noticia.h2.get('class')):
                news_dict[f"{index}. {titulo_final}"] = href
                index += 1
            elif not class_h and not class_h2:
                news_dict[f"{index}. {titulo_final}"] = href
                index += 1

    return news_dict

# Funções para os sites específicos
def get_news_globo():
    return get_news('https://www.globo.com/', class_h='post__title', class_h2='post-multicontent__link__text')

def get_news_scmp():
    return get_news('https://www.scmp.com/home', base_url='https://www.scmp.com', translate=False)

def get_news_rt():
    return get_news('https://www.rt.com/news/', base_url='https://www.rt.com', translate=False)

def get_news_nknews():
    return get_news('https://www.nknews.org/', base_url='https://www.nknews.org', translate=False)

def get_news_shine():
    return get_news('https://www.shine.cn/news/', base_url='https://www.shine.cn', translate=False)

def get_news_sputnik():
    return get_news('https://sputnikglobe.com/', base_url='https://sputnikglobe.com', translate=False)

def get_news_iclnoticias():
    return get_news('https://iclnoticias.com.br/')

def get_news_brasil247():
    return get_news('https://www.brasil247.com/', class_h='block__title', class_h2='block__subtitle')

def get_news_jornalggn():
    return get_news('https://jornalggn.com.br/')

def get_news_xinhua():
    return get_news('https://portuguese.news.cn/index.htm', base_url='https://portuguese.news.cn', translate=False)

def get_news_noticiabrasil():
    return get_news('https://noticiabrasil.net.br/')

def get_news_operamundi():
    return get_news('https://operamundi.uol.com.br/')

def get_news_cnnbrasil():
    # Tentativa de capturar mais notícias da CNN Brasil
    return get_news('https://www.cnnbrasil.com.br/', class_h='home__title', class_h2='home__subtitle')

def get_news_thenewyorktimes():
    return get_news('https://www.nytimes.com/international', base_url='https://www.nytimes.com', translate=False)

def get_news_thewashingtonpost():
    return get_news('https://www.washingtonpost.com', base_url='https://www.washingtonpost.com', translate=False)

def get_news_theguardian():
    return get_news('https://www.theguardian.com/international', base_url='https://www.theguardian.com', translate=False)

# Função para converter imagem em base64
def image_to_base64(image_path):
    with open(image_path, "rb") as img_file:
        return base64.b64encode(img_file.read()).decode('utf-8')

# Função para enviar e-mail
def send_email(news_sites):
    outlook = win32.Dispatch('outlook.application')
    mail = outlook.CreateItem(0)
    mail.Subject = 'Resumo de Notícias'
    mail.To = 'brito.flavio@hotmail.com'
    
    message = '<h1>Resumo de Notícias</h1>'
    for site_name, news_func in news_sites.items():
        try:
            flag_base64 = image_to_base64(get_flag_path(site_name))
            message += f'<h2><img src="data:image/png;base64,{flag_base64}" alt="Flag" style="width:10px;height:10px;"> {site_name}</h2>'
        except FileNotFoundError:
            message += f'<h2>{site_name} (Imagem da bandeira não encontrada)</h2>'
        
        news_dict = news_func()
        for title, link in news_dict.items():
            message += f'<p><a href="{link}">{title}</a></p>'
    
    mail.HTMLBody = message
    mail.Send()

# Definindo os sites de notícias e funções associadas
news_sites = {
    "SCMP - South China Morning Post": get_news_scmp,
    "RT - Rede de televisão russa": get_news_rt,
    "NK News": get_news_nknews,
    "Shine - Shanghai Daily": get_news_shine,
    "Sputnik Internacional": get_news_sputnik,
    "ICL Notícias": get_news_iclnoticias,
    "Brasil 247": get_news_brasil247,
    "Jornal GGN": get_news_jornalggn,
    "Xinhua": get_news_news_cn,
    "Sputnik Brasil": get_news_sputnik,
    "Opera Mundi": get_news_operamundi,
    "Globo": get_news_globo,
    "CNN Brasil": get_news_cnnbrasil,
    "The New York Times": get_news_thenewyorktimes,
    "The Washington Post": get_news_thewashingtonpost,
    "The Guardian": get_news_theguardian,
}

# Enviar o e-mail com as notícias
send_email(news_sites)