> # news-letter-bot

In [None]:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime
from dateutil.relativedelta import relativedelta
import configparser
from pretty_html_table import build_table


## Class & Function

In [None]:
class PyMail:
    
    def __init__(self, my_email_id, my_email_pw, smtp_host, smtp_port):
        # 계정 정보 초기화
        self.my_email_id = my_email_id
        self.my_email_pw = my_email_pw
        
        # 세션 정의
        self.session = smtplib.SMTP(smtp_host, smtp_port)
        
        print(f">>> PyMail 초기화 성공")
    
    def send_mail(self, target_email_id, title, contents, subtype=False, attachment_path=False):
        
        # 세션 보안 TLS 시작
        self.session.starttls()
        
        # 세션 계정 로그인
        self.session.login(self.my_email_id, self.my_email_pw)
        
        # 제목, 본문 작성
        msg = MIMEMultipart()
        msg['Subject'] = title
        if subtype == False:
            msg.attach(MIMEText(contents, 'plain'))
        else:
            msg.attach(MIMEText(contents, subtype))

        # 파일첨부 (파일 미첨부시 생략가능)
        if attachment_path:
            fileName = attachment_path.split("/")[-1]
            attachment = open(attachment_path, 'rb')
            part = MIMEBase('application', 'octet-stream')
            part.set_payload((attachment).read())
            encoders.encode_base64(part)
            part.add_header('Content-Disposition', "attachment; filename= " + fileName)
            msg.attach(part)
        
        # 메일 전송
        try:
            self.session.sendmail(my_email_id, target_email_id, msg.as_string())
            self.session.quit()
            print(f">>> 메일 발송 성공")
        except:
            self.session.quit()
            print(f">>> 메일 발송 실패")

In [None]:
def df_to_html_table(df):
    output = build_table(df, 'blue_light')
    return output

## Make Contents

In [None]:
# 검색 키워드
search_word_list = ['스노우플레이크','팔란티어','테슬라','아메리칸워터웍스','로즈타운모터스','워크호스']
# 정렬 기준
sort = 1            # 0: 관련도순, 1: 최신순, 2: 오래된순


# Requests
df = pd.DataFrame()
for search_word in search_word_list:

    # 해당 url의 html문서를 soup 객체로 저장
    url = f'https://m.search.naver.com/search.naver?where=m_news&sm=mtb_jum&query={search_word}&sort={sort}'

    req = requests.get(url)
    html = req.text
    soup = BeautifulSoup(html, 'html.parser')

    search_result = soup.select_one('#news_result_list')
    news_links = search_result.select('.bx > .news_wrap > a')
    times = search_result.select("#news_result_list > li > div > div.news_info > div.info_group > span:nth-child(2)")
    source = search_result.select("#news_result_list > li > div.news_wrap > div.news_info > div.info_group > a")

    title_list = list(map(lambda x: x.text, news_links))
    link_list = list(map(lambda x: x.attrs['href'], news_links))
    times_list = list(map(lambda x: x.text, times))
    source_list = list(map(lambda x: x.text, source))
    source_link_list = list(map(lambda x: x.attrs['href'], source))
    
    tmp = pd.DataFrame({"Title": title_list, "Times": times_list, "Source": source_list, "Link": link_list, "SourceLink": source_link_list})
    tmp['Keyword'] = search_word
    
    df = df.append(tmp.head(3))

df = df[['Keyword', 'Title', 'Times', 'Source', 'Link', 'SourceLink']]
df.index = range(len(df))

## Preprocessing

In [None]:
new_title_list = []
new_source_list = []
for idx, row in df.iterrows():
    title = row['Title']
    link = row['Link']
    
    source = row['Source']
    source_link = row['SourceLink']
    
    new_title = f"""<a href="{link}">{title}</a>"""
    new_source = f"""<a href="{source_link}">{source}</a>"""
    
    new_title_list.append(new_title)
    new_source_list.append(new_source)
    
df['Title_Link'] = new_title_list
df['Source_Link'] = new_source_list

# 시점 계산
now = datetime.datetime.now()
df.loc[df['Times'].str.contains("일 전"), 'Times_'] = df.loc[df['Times'].str.contains("일 전")]['Times'].apply(lambda x: now-relativedelta(days=int(x.split("일")[0])))
df.loc[df['Times'].str.contains("시간 전"), 'Times_'] = df.loc[df['Times'].str.contains("시간 전")]['Times'].apply(lambda x: now-relativedelta(hours=int(x.split("시간")[0])))
df.loc[df['Times'].str.contains("분 전"), 'Times_'] = df.loc[df['Times'].str.contains("분 전")]['Times'].apply(lambda x: now-relativedelta(minutes=int(x.split("분")[0])))
df.loc[df['Times'].str.contains("\."), 'Times_'] = df.loc[df['Times'].str.contains("\.")]['Times'].apply(lambda x: datetime.datetime.strptime(x, "%Y.%m.%d."))
df['Times_'] = pd.to_datetime(df['Times_']).apply(lambda x: datetime.datetime.strftime(x, "%Y-%m-%d"))

# 결과
df_cls = df[['Keyword','Title_Link','Times_','Source_Link']]
colDict = {"Keyword": "주제",
           "Title_Link": "제목",
           "Times_": "날짜",
           "Source_Link": "채널"}
df_cls = df_cls.rename(columns=colDict)

## Convert from DF to HTML

In [None]:
# HTML로 변환하기
html_contents = df_to_html_table(df_cls)
html_contents = html_contents.replace("&lt;","<").replace("&gt;",">")

## Setting

In [None]:
# 설정 파일 경로
config_path = "./config.ini"

# 설정 파일 읽기
config = configparser.ConfigParser()
config.read(config_path)

# 이메일 계정 정보
my_email_id = config['gmail']['id']
my_email_pw = config['gmail']['pw']

# SMTP 호스트 명
smtp_host = 'smtp.gmail.com'
smtp_port = 587

# 타이틀 및 컨텐츠 작성
date_str = datetime.datetime.strftime(datetime.datetime.now(),'%Y년 %m월 %d일')
title = f"""[{date_str} 데일리 뉴스레터]"""
contents=f'''{html_contents}'''

# # 첨부파일 경로 설정
# attachment_path = f"D:/Task.txt"

# 수신자 정보 설정
target_email_id = config['test']['id']

# 문서 타입 설정 - plain, html 등
subtype = 'html'

## Sending

In [None]:
# 세션 설정
PM = PyMail(my_email_id, my_email_pw, smtp_host, smtp_port)
# 메일 발송
# PM.send_mail(target_email_id, title, contents, subtype, attachment_path)
PM.send_mail(target_email_id, title, contents, subtype)