In [1]:
# for article content scripting with BeautifulSoup
import requests
from bs4 import BeautifulSoup
import re

# for rendering html template with Jinja
from jinja2 import Environment, FileSystemLoader
import os

# for building and sending emails with MailChimp
import mailchimp_marketing as MailchimpMarketing
from mailchimp_marketing.api_client import ApiClientError
import json

# for viewing the email draft in webpage
import webbrowser
from datetime import date

import config as cf
import importlib
cf = importlib.reload(cf)

# Define the Article Object with BeautifulSoup

In [4]:
class Article():
    
    def __init__(self, url, title=None, img_url=None, preview_text=None, tag=None, label=None, class_name='article', id_name='main', video_page_url=None):
        
        self._url = url.replace('/gb/', '/b5/')
        self._title = title
        self._img_url = img_url
        self._preview_text = preview_text
        self._tag = tag
        self._label = label
        self._video_page_url = video_page_url.replace('/gb/', '/b5/')
        
        page = requests.get(self._url)
        self._soup = BeautifulSoup(page.text, 'html.parser')
        
        self._article = self._soup.find(class_=class_name)
        
        if not self._article:
            self._article = self._soup.find(id=id_name)
            
        if not self._article:
            self._article = self._soup.find('article')
        
    @property
    def url(self):
        return self._url
    
    @url.setter
    def url(self, value):
        self._url = value
        
    @property
    def title(self, class_name='title'):
        
        if not self._title:
            article_title = self._article.find(class_=class_name)
            self._title = article_title.text
            
        return self._title
    
    @title.setter
    def title(self, value):
        self._title = value
    
    @property
    def img_url(self, class_name='featured_image'):
        
        if not self._img_url:
            try:
                self._img_url = self._article.find(class_=class_name).find('a').get('href')
            except:
                try:
                    self._img_url = self._article.find(class_=class_name).find('figure').find('img').get('src')
                except:
                    try:
                        self._img_url = self._article.find(class_='wp-post-image').get('src')
                    except:
                        page = requests.get(self._video_page_url)
                        video_page_soup = BeautifulSoup(page.text, 'html.parser')
                        self._img_url = video_page_soup.select_one(f"a[href='{self._url}']").find('img').get('src')
        
        return self._img_url
    
    @img_url.setter
    def img_url(self, value):
        self._img_url = value
    
    @property
    def preview_text(self, id_name='artbody'):
        
        paragraph_index = 0
        
        while not self._preview_text:
            article_first_paragraph = self._article.find(id=id_name).find_all('p')[paragraph_index].text
            article_preview_text = re.sub(r'\【[^)]*\】', '', article_first_paragraph)
            article_preview_text = re.sub(r'\n', '', article_preview_text) 
            article_preview_text = re.sub(r'\（英文大纪元[^)]*\编译）', '', article_preview_text)
            article_preview_text = re.sub(r'\（英文大紀元[^)]*\編譯）', '', article_preview_text)
            article_preview_text = re.sub(r'\（大紀元[^)]*\報導）', '', article_preview_text)
            article_preview_text = re.sub(r'\（大紀元[^)]*\報導）', '', article_preview_text)
            self._preview_text = re.sub(r'\（大纪元[^)]*\报导）', '', article_preview_text)
            paragraph_index += 1
        
        return self._preview_text
    
    @preview_text.setter
    def preview_text(self, value):
        self._preview_text = value
        
    @property
    def tag(self, id_name='breadcrumb'):
        
        if not self._tag:
            breadcrumb = self._soup.find(id=id_name)
            raw_tag = breadcrumb.findAll('a')[-1].text
            raw_tag = re.sub(r'\([^)]*\)', '', raw_tag)
            self._tag = re.sub(r'\（[^)]*\）', '', raw_tag)
        
        return self._tag
    
    @tag.setter
    def tag(self, value):
        self._tag = value
        
    @property
    def label(self):
        return self._label
    
    @label.setter
    def label(self, value):
        self._label = value

In [5]:
class Content(Article):
    
    def __init__(self, article_url_list, video_url_list, video_page_url=None):
        
        # parse data
        self.article_list = [Article(article_url, video_page_url=video_page_url) for article_url in article_url_list]
        
        self.video_list = [Article(video_url, video_page_url=video_page_url) for video_url in video_url_list]
        self.video_pair_list = [[value, self.video_list[counter+1]] for counter, value in enumerate(self.video_list) if counter%2 == 0]      

In [6]:
class Jinja_Template(Content):
    
    def __init__(self, 
                 template_folder_name = 'Jinja Templates', 
                 template_file_name = 'newsletter_as_jinja_template_base.html'):

        file_loader = FileSystemLoader('../'+template_folder_name+'/')
        self.env = Environment(loader=file_loader, extensions=['jinja2.ext.do'])

        # render the base template
        self.template = self.env.get_template(template_file_name)
        
    def render(self, source_contect, 
               folder_name='Exported HTML', 
               rendered_newsletter_filename = 'rendered_newsletter.html'):
        
        # description: subject_line and preview_text
        self.subject_line = source_contect.article_list[0].title
        article_title_list = [article.title for article in source_contect.article_list]
        self.preview_text = ' · '.join(article_title_list[1:])
        
        self.rendered_content = self.template.render(env=self.env, 
                                                   header=source_contect.article_list[0], 
                                                   article_list=source_contect.article_list, 
                                                   video_pair_list=source_contect.video_pair_list)
        
        self.folder_path = '../' + folder_name + '/'
        
        if not os.path.exists(self.folder_path):
            os.makedirs(self.folder_path)
        
        self.newsletter_path = self.folder_path + rendered_newsletter_filename
        
        with open(self.newsletter_path, "w") as fh:
            fh.write(self.rendered_content)
            
        print('newsletter rendered at: '+ self.newsletter_path)
        
    def preview(self):

        webbrowser.open('file://' + os.path.realpath(self.newsletter_path))

In [7]:
class MailChimp_Campaign(Jinja_Template):

    def __init__(self, newsletter, mailchimp_config,
                 # sender information
                 from_name='Anonymous Sender',
                 reply_to='no_reply@email.com'):

        self.subject_line = newsletter.subject_line
        self.preview_text = newsletter.preview_text
        self.rendered_content = newsletter.rendered_content
        self.mailchimp_config = mailchimp_config

        # sender information
        self.from_name = from_name
        self.reply_to = reply_to
        
        # campaign name
        today = date.today().strftime("%d %B %Y")
        self.campaign_title = 'Campaign_Created_through_API' + today

    def connect_to_MailChimp(self):

        self.client = MailchimpMarketing.Client()
        self.client.set_config({
            "api_key": self.mailchimp_config["API_KEY"],
            "server": self.mailchimp_config["API_KEY"].split('-')[-1]
        })

        response_ping = self.client.ping.get()

        if response_ping['health_status'] == "Everything's Chimpy!":
            print("MailChimp account connected!")

    def update_MailChimp_template(self):

        response = self.client.templates.update_template(
            self.mailchimp_config['template_id'], {"name": "Template_Created_through_API", "html": self.rendered_content})
        print("Template updated at:", response['date_edited'])

    def specify_recipients(self):

        if self.mailchimp_config['segment_id']:
            self.recipients = {'segment_opts':
                               {'saved_segment_id':
                                   self.mailchimp_config['segment_id']},
                               'list_id': self.mailchimp_config['list_id']}
        else:
            self.recipients = {'list_id': self.mailchimp_config['list_id']}

        print('recipients specified.')

    def create(self):

        self.connect_to_MailChimp()
        self.update_MailChimp_template()
        self.specify_recipients()

        self.created_campaign = self.client.campaigns.create({"type": "regular",
                                                              'recipients': self.recipients,
                                                              "settings":
                                                              {"title": self.campaign_title,
                                                               "subject_line": self.subject_line,
                                                               "preview_text": self.preview_text,
                                                               "from_name": self.from_name,
                                                               "reply_to": self.reply_to,
                                                               "template_id": self.mailchimp_config['template_id'],
                                                               "auto_footer": False}})
        print('Campaign created successfully!')

    def send(self):

        campaign_id = self.created_campaign['id']
        response_of_campaigns_send = self.client.campaigns.send(campaign_id)

        return response_of_campaigns_send

# Processing Part

In [14]:
article_url_list = ['https://www.epochtimes.com/gb/21/5/2/n12918699.htm',
                    'https://www.epochtimes.com/b5/21/4/27/n12908772.htm',
                    'https://www.epochtimes.com/b5/21/4/28/n12912197.htm',
                    'https://www.epochtimes.com/b5/21/4/28/n12912241.htm']

video_url_list = ['https://www.epochtimes.com/gb/21/3/30/n12847118.htm',
                  'https://www.epochtimes.com/gb/21/5/1/n12917729.htm',
                  'https://www.epochtimes.com/gb/21/4/28/n12911917.htm',
                  'https://www.epochtimes.com/gb/21/4/30/n12915264.htm']

video_page_url = 'https://www.epochtimes.com/gb/ncid1247351.htm'

In [15]:
source_content = Content(article_url_list, video_url_list, video_page_url=video_page_url)

In [16]:
for article in source_content.article_list:
    print(article.title)
    print(article.tag)
    print(article.label)
    print(article.preview_text)
    print()

中企悄悄買巨額英國資產 含能源國防領域
國際要聞
None
最新研究發現，中共悄悄購置了1340億英鎊的英國資產，從核電廠到私立學校和比薩連鎖店；有近200家英國公司要麼被中國投資者控制，要麼作為小股東入股。

英航母將停靠日本港口 日防長表示歡迎
國際要聞
None
日本防衛大臣岸信夫週二（4月27日）表示，英國航空母艦打擊群計劃停靠日本港口，「象徵擁有悠久歷史與傳統的日英關係將進入新階段」。他還透露，荷蘭海軍護衛艦將加入英國航空母艦打擊群。

移居英國的香港人租房難
移民新聞
None
英國幾家民間機構于4月26日發佈的一份調查報告顯示：英國的房東不太願意將他們的房產出租給來自香港的BNO簽證申請人。因為他們不能提供在英國租房時通常所需的文件：如工作證明、租房證明或銀行帳戶。目前政府的指導文件在這方面也不明確。

歐冠半決賽首回合：曼城和切爾西搶佔先機
國際足球
None
本週中，歐冠半決賽進行了首回合較量。英超「領頭羊」曼城在客場2：1逆轉了上屆亞軍巴黎聖日耳曼，晉級在望；西甲豪門皇家馬德里在主場1：1與英超強敵切爾西握手言和，晉級形勢已落於下風。



In [110]:
# source_content.article_list[0].title = '疫情导致去年英国经济缩水创300年记录'

In [17]:
for article in source_content.video_list:
    try:
        print(article.title)
        print(article.tag)
        print(article.preview_text)
        print(article.img_url)
        print()
    except:
        pass

《抉擇》5月7日網絡首映 主演談幕後感言
每日新聞
即將網絡首映的新世紀影視年度力作《抉擇》，日前發布了一則演員採訪先導預告片。片中主持人慧月採訪了《抉擇》的主演鄭雪菲、李炎等人，女主角鄭雪菲分享了創作這部影片的契機，以及演員們幕後的心聲。
https://i.epochtimes.com/assets/uploads/2021/05/id12918027-0772aad7bbc2ad554e232f740bed24f3-600x400.jpg

香港台創辦人：讚大紀元記者無畏無懼精神
珍言真語
香港大紀元記者梁珍近日被人跟蹤及敲門滋擾事件曝光後，引發各界關注。香港台（Hongkonger Station）創辦人鄭敬基在採訪中稱讚梁珍為「神奇女俠」，他相信是一種神奇的力量支持梁珍反過來追趕跟蹤她的彪形大漢。梁珍表示，邪惡是最怕曝光的，「證明了我們不用怕邪惡，因爲邪的東西是怕正的，所以你不怕它會怕的」。
https://i.epochtimes.com/assets/uploads/2021/05/id12917736-329fe6b1cbb6e9084273dac6d4aca08b-320x200.jpg

【探索時分】史上4大長途空襲 航程繞地球1周
軍事天地
大家好，我們今天和大家介紹軍事界的四大長途空襲，不管你有多遠，就算飛行44個小時，3.5萬公里，也能找到你。
https://i.epochtimes.com/assets/uploads/2021/04/id12911928-7f7f9d415daa9212c93d95b5e57b7e35-320x200.jpg

【拍案驚奇】白宮遭聲波攻擊？拜登演講斯科特走紅
新聞拍案驚奇
大家好，歡迎收看《新聞拍案驚奇》，我是大宇。
https://i.epochtimes.com/assets/uploads/2021/04/id12915296-d64c932b0b07ac9359b9d2febc354a82-600x400.jpg



In [10]:
source_content.video_list[1].img_url

'https://i.epochtimes.com/assets/uploads/2021/04/id12902492-4eb44cc4941a25c091e950adfc506d55-320x200.jpg'

In [12]:
for video in source_content.video_list:
    print(video.img_url)

https://i.epochtimes.com/assets/uploads/2020/10/0495326fe7e58dcf8ff9471e7a2a529e-320x200.jpg
https://i.epochtimes.com/assets/uploads/2021/04/id12902492-4eb44cc4941a25c091e950adfc506d55-320x200.jpg
https://i.ntdtv.com/assets/uploads/2021/04/maxresdefault-81-800x450.jpg
https://i.ntdtv.com/assets/uploads/2021/04/thumbnail_d-1-7-800x450.jpg


In [29]:
source_content.video_list[0].img_url = 'https://i.ntdtv.com/assets/uploads/2021/04/maxresdefault-2-16-800x450.jpg'

In [36]:
source_content.video_list[1].img_url = 'https://i.epochtimes.com/assets/uploads/2021/04/id12871696-731c1292faf8ecb1db04b660aa2871c3-600x400.jpg'

In [37]:
source_content.video_list[2].img_url = 'https://i.epochtimes.com/assets/uploads/2021/04/id12871426-a948bfecbdb0af2e8292697829ecb2a5-320x200.jpg'

In [38]:
source_content.video_list[3].img_url = 'https://i.epochtimes.com/assets/uploads/2021/04/id12871886-51960545c5e2f1bfd1522d9ccab9f3d5-320x200.jpg'

In [18]:
newsletter = Jinja_Template()

In [19]:
newsletter.render(source_content)

newsletter rendered at: ../Exported HTML/rendered_newsletter.html


In [20]:
newsletter.preview()

# Connect to MailChimp

## Test Campaign

In [21]:
mailchimp_config_test = {
    "API_KEY": cf.mailchimp_config_test.API_KEY,
    "template_id": cf.mailchimp_config_test.template_id,
    "segment_id": cf.mailchimp_config_test.segment_id, # recipients
    "list_id": cf.mailchimp_config_test.list_id # members list
}

### Create MailChimp campaign

In [22]:
test_campaign = MailChimp_Campaign(newsletter, mailchimp_config_test, 
                                   from_name=cf.mailchimp_compaign.from_name, 
                                   reply_to=cf.mailchimp_compaign.reply_to)

In [23]:
test_campaign.create()

MailChimp account connected!
Template updated at: 2021-05-02T13:24:49+00:00
recipients specified.
Campaign created successfully!


In [24]:
test_campaign.send()

<Response [204]>

## Real Campaign

In [25]:
mailchimp_config_real = {
    "API_KEY": cf.mailchimp_config_real.API_KEY,
    "template_id": cf.mailchimp_config_real.template_id,
    "segment_id": cf.mailchimp_config_real.segment_id, # recipients
    "list_id": cf.mailchimp_config_real.list_id # members list
}

In [26]:
real_campaign = MailChimp_Campaign(newsletter, mailchimp_config_real, 
                                   from_name=cf.mailchimp_compaign.from_name, 
                                   reply_to=cf.mailchimp_compaign.reply_to)

In [27]:
real_campaign.create()
()
()
()
real_campaign.create()

MailChimp account connected!
Template updated at: 2021-05-02T13:25:00+00:00
recipients specified.
Campaign created successfully!


In [28]:
real_campaign.send()

<Response [204]>

## Drafts

In [200]:
# define article tag
tag_list = ['英国新闻', '英国新闻', '英国新闻', '英国新闻']

for index in range(len(tag_list)):
    article_list[index].tag = tag_list[index]
    
# article_list[0].img_url = 'https://i.epochtimes.com/assets/uploads/2020/12/2012112056442124.jpg'