> # news-letter-bot

In [1]:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime
from dateutil.relativedelta import relativedelta
import configparser
from pretty_html_table import build_table

## Class & Function

In [2]:
class PyMail:
    
    def __init__(self, my_email_id, my_email_pw, smtp_host, smtp_port):
        # 계정 정보 초기화
        self.my_email_id = my_email_id
        self.my_email_pw = my_email_pw
        
        # 세션 정의
        self.session = smtplib.SMTP(smtp_host, smtp_port)
        
        print(f">>> PyMail 초기화 성공")
    
    def send_mail(self, target_email_id, title, contents, subtype=False, attachment_path=False):
        
        # 세션 보안 TLS 시작
        self.session.starttls()
        
        # 세션 계정 로그인
        self.session.login(self.my_email_id, self.my_email_pw)
        
        # 제목, 본문 작성
        msg = MIMEMultipart()
        msg['Subject'] = title
        if subtype == False:
            msg.attach(MIMEText(contents, 'plain'))
        else:
            msg.attach(MIMEText(contents, subtype))

        # 파일첨부 (파일 미첨부시 생략가능)
        if attachment_path:
            fileName = attachment_path.split("/")[-1]
            attachment = open(attachment_path, 'rb')
            part = MIMEBase('application', 'octet-stream')
            part.set_payload((attachment).read())
            encoders.encode_base64(part)
            part.add_header('Content-Disposition', "attachment; filename= " + fileName)
            msg.attach(part)
        
        # 메일 전송
        try:
            self.session.sendmail(my_email_id, target_email_id, msg.as_string())
            self.session.quit()
            print(f">>> 메일 발송 성공")
        except:
            self.session.quit()
            print(f">>> 메일 발송 실패")

In [3]:
def df_to_html_table(df, index=False):
    return build_table(df, 'blue_light')

def make_contents(search_word_list, sort):
    df = pd.DataFrame()
    for search_word in search_word_list:

        # 해당 url의 html문서를 soup 객체로 저장
        url = f'https://m.search.naver.com/search.naver?where=m_news&sm=mtb_jum&query={search_word}&sort={sort}'

        req = requests.get(url)
        html = req.text
        soup = BeautifulSoup(html, 'html.parser')

        search_result = soup.select_one('#news_result_list')
        news_links = search_result.select('.bx > .news_wrap > a')
        times = search_result.select("#news_result_list > li > div > div.news_info > div.info_group > span:nth-child(2)")
        source = search_result.select("#news_result_list > li > div.news_wrap > div.news_info > div.info_group > a")

        title_list = list(map(lambda x: x.text, news_links))
        link_list = list(map(lambda x: x.attrs['href'], news_links))
        times_list = list(map(lambda x: x.text, times))
        source_list = list(map(lambda x: x.text, source))
        source_link_list = list(map(lambda x: x.attrs['href'], source))

        tmp = pd.DataFrame({"Title": title_list, "Times": times_list, "Source": source_list, "Link": link_list, "SourceLink": source_link_list})
        tmp['Keyword'] = search_word

        df = df.append(tmp.head(3))

    df = df[['Keyword', 'Title', 'Times', 'Source', 'Link', 'SourceLink']]
    df.index = range(len(df))
    
    return df

def preprocessing(df):
    new_title_list = []
    new_source_list = []
    for idx, row in df.iterrows():
        title = row['Title']
        link = row['Link']

        source = row['Source']
        source_link = row['SourceLink']

        new_title = f"""<a href="{link}">{title}</a>"""
        new_source = f"""<a href="{source_link}">{source}</a>"""

        new_title_list.append(new_title)
        new_source_list.append(new_source)

    df['Title_Link'] = new_title_list
    df['Source_Link'] = new_source_list

    # 시점 계산
    now = datetime.datetime.now()
    df.loc[df['Times'].str.contains("일 전"), 'Times_'] = df.loc[df['Times'].str.contains("일 전")]['Times'].apply(lambda x: now-relativedelta(days=int(x.split("일")[0])))
    df.loc[df['Times'].str.contains("시간 전"), 'Times_'] = df.loc[df['Times'].str.contains("시간 전")]['Times'].apply(lambda x: now-relativedelta(hours=int(x.split("시간")[0])))
    df.loc[df['Times'].str.contains("분 전"), 'Times_'] = df.loc[df['Times'].str.contains("분 전")]['Times'].apply(lambda x: now-relativedelta(minutes=int(x.split("분")[0])))
    df.loc[df['Times'].str.contains("\."), 'Times_'] = df.loc[df['Times'].str.contains("\.")]['Times'].apply(lambda x: datetime.datetime.strptime(x, "%Y.%m.%d."))
    df['Times_'] = pd.to_datetime(df['Times_']).apply(lambda x: datetime.datetime.strftime(x, "%Y-%m-%d"))

    # 결과
    df_cls = df[['Keyword','Title_Link','Times_','Source_Link']]
    colDict = {"Keyword": "주제",
               "Title_Link": "제목",
               "Times_": "날짜",
               "Source_Link": "채널"}
    df_cls = df_cls.rename(columns=colDict)
    return df_cls

In [11]:
def merge_with_html_template(contents, sort):
    """
    HTML 뉴스레터 템플릿 적용
    """
    
    sort_dict = {0: "관련도순", 1: "최신순", 2: "오래된순"}
    
    
    html = f"""
    
<div style="width:100%">

    <div style="max-width:600px;margin:0 auto;padding:60px 0 30px 0;font-family:'Roboto',Arial,Helvetica,sans-serif;font-size:16px;line-height:1.5;border:1px solid #e2e2e2">
  
      <div align="center" style="padding-right:0px;padding-left:0px" class="logo-area">
        <div style="font-size:1px;line-height:20px">&nbsp;</div><a href="https://wooiljeong.github.io" style="outline:none" target="_blank"> <img align="center" border="0" src="https://avatars.githubusercontent.com/u/38076110?v=4" alt="Logo" title="Logo" style="text-decoration-line: none; height: auto; border: none; width: 100%; max-width: 143px; display: block;" width="143"></a>
        <div style="font-size:1px;line-height:20px">&nbsp;</div>
      </div>
  
      <hr style="border:0;border-top:solid 1px #e2e2e2;width:90%;margin:30px auto" class="horizontal-line">
  
      <div style="max-width:90%;margin-left:auto;margin-right:auto;margin-top:40px" class="nomal-paragraph">
  
        <div style="margin-top:20px">
          안녕하세요. 키워드 뉴스레터 템플릿 입니다.
        </div>
  
      </div>
  
      <div style="max-width:90%;margin-left:auto;margin-right:auto;margin-top:20px" class="bullet-point">
        <ul>
          <li>조회기준: {sort_dict[sort]}</li>
        </ul>
      </div>
  
      <div style="max-width:90%;margin-left:auto;margin-right:auto;margin-top:20px" class="bullet-point">
        <ol>
          <!-- <li>순서 리스트</li> -->
        </ol>
      </div>
      
      <!-- 메인 이미지 넣기 -->
      <div align="center" style="padding-right:0px;padding-left:0px;margin-top:40px" class="full-image">
        <!-- <img align="center" border="0" src="https://steemitimages.com/640x0/https://cdn.steemitimages.com/DQmdZZLcyLv6Z1BXQ7mfarYq5a2MnAhimaJfCq2KAxYoSKT/main.png" alt="Image" title="Image" style="border: 0px; height: auto; width: 100%; display: block;"> -->
      </div>
  
      <div style="max-width:90%;margin-left:auto;margin-right:auto;margin-top:40px" class="nomal-paragraph">
  
        <div style="margin-top:20px">
          {contents}
        </div>
  
      </div>
  
      <div align="center" style="padding-top:40px;padding-right:10px;padding-bottom:10px;padding-left:10px">
        <a href="#" style="text-decoration-line: none; display: inline-block; color: rgb(255, 255, 255); background-color: rgb(0, 0, 0); border-radius: 60px; width: auto; border-width: 1px; border-style: solid; border-color: rgb(0, 0, 0); padding: 10px 25px;" target="_blank">더 보기</a>
      </div>
  
      <div style="text-align:center;">
        <a style="font-size:12px;color:silver" href="mailto:mcwooil2@gmail.com?subject=Unsubscribe!&amp;body=I&nbsp;don't&nbsp;want&nbsp;to&nbsp;receive&nbsp;an&nbsp;email&nbsp;from&nbsp;your&nbsp;service!" target="_blank">Unsubscribe from emails</a>
      </div>
  
    </div>
  </div>
    
    """
    
    return html

In [12]:
def make_final_contents(search_word_list, sort=1):
    """
    컨텐츠 만들기
    sort: 정렬 기준 - 0: 관련도순, 1: 최신순, 2: 오래된순
    """
    # 컨텐츠 생성
    df = make_contents(search_word_list, sort)
    # 전처리
    df_cls = preprocessing(df)
    # HTML로 변환하기
    html = df_to_html_table(df_cls)
    # HTML Contents
    contents_ = html.replace("&lt;","<").replace("&gt;",">")
    # 뉴스레터 HTML 템플릿 적용
    contents = merge_with_html_template(contents_, sort)
    return contents

## Make Contents

In [19]:
# 검색 키워드
search_word_list = ['네이버','카카오','라인','쿠팡','배달의민족']

# 컨텐츠 생성
contents = make_final_contents(search_word_list, sort=0)

## Setting

- `./config.ini`
```ini
[gmail]
id=G메일 주소
pw=G메일 앱 비밀번호
[target]
id=받는 사람 메일 주소
```

In [20]:
# 설정 파일 경로
config_path = "./config.ini"

# 설정 파일 읽기
config = configparser.ConfigParser()
config.read(config_path)

# 이메일 계정 정보
my_email_id = config['gmail']['id']
my_email_pw = config['gmail']['pw']

# SMTP 호스트 명
smtp_host = 'smtp.gmail.com'
smtp_port = 587

# 타이틀 및 컨텐츠 작성
date_str = datetime.datetime.strftime(datetime.datetime.now(),'%Y년 %m월 %d일')
title = f"""📢 정우일 키워드 뉴스레터 ({date_str})"""
contents=f'''{contents}'''

# # 첨부파일 경로 설정
# attachment_path = f"D:/Task.txt"

# 수신자 정보 설정
target_email_id = config['target']['id']

# 문서 타입 설정 - plain, html 등
subtype = 'html'

## Sending

In [21]:
# 세션 설정
PM = PyMail(my_email_id, my_email_pw, smtp_host, smtp_port)
# 메일 발송
# PM.send_mail(target_email_id, title, contents, subtype, attachment_path)
PM.send_mail(target_email_id, title, contents, subtype)

>>> PyMail 초기화 성공
>>> 메일 발송 성공
