In [None]:
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

from selenium.webdriver.chrome.options import Options

chrome_options = Options()


chrome_options.add_argument("--no-sandbox")  # 解决DevToolsActivePort文件不存在的报错
chrome_options.add_argument("window-size=1920x3000")  # 指定浏览器分辨率
chrome_options.add_argument("--disable-gpu")  # 谷歌文档提到需要加上这个属性来规避bug
chrome_options.add_argument("--hide-scrollbars")  # 隐藏滚动条, 应对一些特殊页面
chrome_options.add_argument(
    "blink-settings=imagesEnabled=false"
)  # 不加载图片, 提升速度
# 浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
chrome_options.add_argument("--headless")

In [None]:
def get_paper_status(system_dict):
    def extract_paper_info(rows):
        """从表格行中提取论文信息"""
        papers = []
        for row in rows:
            if (
                not row.find_elements(
                    By.CSS_SELECTOR, 'td[data-label="status"] .pagecontents'
                )
                or not row.find_elements(By.CSS_SELECTOR, 'td[data-label="ID"]')
                or not row.find_elements(By.CSS_SELECTOR, 'td[data-label="title"]')
            ):
                continue
            status = row.find_element(
                By.CSS_SELECTOR, 'td[data-label="status"] .pagecontents'
            ).text
            id_ = row.find_element(By.CSS_SELECTOR, 'td[data-label="ID"]').text
            title = (
                row.find_element(By.CSS_SELECTOR, 'td[data-label="title"]')
                .get_attribute("innerHTML")
                .split("<br>")[0]
                .strip()
            )
            papers.append((status, id_, title))
            # print
            print(f"Status: {status} \n    ID: {id_} \n Title: {title}")
            print()

        return papers

    # 初始化浏览器
    driver = webdriver.Chrome(options=chrome_options)
    driver.get(system_dict["URL"])
    driver.maximize_window()

    # 登录
    driver.find_element(By.ID, "USERID").send_keys(system_dict["userid"])
    driver.find_element(By.ID, "PASSWORD").send_keys(system_dict["password"])
    time.sleep(1)
    driver.find_element(By.ID, "logInButton").send_keys(Keys.ENTER)
    time.sleep(2)

    # 进入论文状态页面
    driver.find_element(
        By.CSS_SELECTOR, "#header .nav-collapse.toplvlnav ul li:nth-child(2) a"
    ).send_keys(Keys.ENTER)

    # 提取论文状态信息
    rows = driver.find_elements(
        By.CSS_SELECTOR, "#authorDashboardQueue_wrapper tbody tr"
    )
    paper_status = extract_paper_info(rows)

    # 进入 "Manuscripts with Decisions" 页面
    manus = ['Manuscripts I Have Co-Authored', 'Manuscripts with Decisions', 'Awaiting Final Files']
    for i in manus:
        time.sleep(5)
        try:
            manu = driver.find_element(By.LINK_TEXT, i)
            manu.send_keys(Keys.ENTER)
            time.sleep(5)

            # 提取 "Manuscripts with Decisions" 页面上的论文信息
            rows = driver.find_elements(
                By.CSS_SELECTOR, "#authorDashboardQueue_wrapper tbody tr"
            )
            paper_status.extend(extract_paper_info(rows))
            rows = driver.find_elements(
                By.CSS_SELECTOR, "#authorDashboardQueue tbody tr"
            )
            paper_status.extend(extract_paper_info(rows))
        except Exception as e:
            print(f"{system_dict['URL']} 没有找到 {i} 页面")

    # 关闭浏览器
    driver.quit()

    return paper_status

In [None]:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.utils import formataddr

# 发送邮件
def send_email(data, rows=[], cc=None):

    sender_email = "xx@qq.com"  # 发件人邮箱
    receiver_email = "xx"  # 收件人邮箱
    password = "xx"  # 发件人邮箱密码

    smtp_server = "smtp.qq.com"
    smtp_port = 587  # Use 465 if you prefer SSL

    message = MIMEMultipart("alternative")
    message["Subject"] = f"{data[1]} for {data[2]}"
    message["From"] = formataddr(("Paper Status Update", sender_email))
    message["To"] = receiver_email
    # set cc emails
    if cc:
        message["Cc"] = ", ".join(cc)

    if rows == []:
        logs_text = ""
    else:
        logs = []
        for row in rows:
            if row[2] == data[2]:
                logs.append(row)

        # 根据时间从以前到现在排序
        logs = sorted(logs, key=lambda x: x[0])

        logs_text = ""
        if logs != []:
            logs_text = "<table border='1' style='border-collapse: collapse;'>"
            logs_text += "<tr><th>Time</th><th>Status</th></tr>"
            for log in logs:
                logs_text += f"<tr><td>{log[0][:11]}</td><td>{log[1]}</td></tr>"
            logs_text += f"<tr><td>{data[0][:11]}</td><td>{data[1]}</td></tr>"
            logs_text += "</table>"

    # Create the email content
    html = f"""
    <html>
    <body>
        <p>Hi,</p>
        <p>The status of your paper has been updated.</p>
        <table border="0" style="border-collapse: collapse;">
        <tr><th>Time</th><td>{data[0]}</td></tr>
        <tr><th>Status</th><td>{data[1]}</td></tr>
        <tr><th>ID</th><td>{data[2]}</td></tr>
        <tr><th>Title</th><td>{data[3]}</td></tr>
        </table>
    """

    if logs_text != "":
        html += f"""
        <p>Previous status:</p>
        <pre>{logs_text}</pre>
    </body>
    </html>
    """

    part = MIMEText(html, "html")
    message.attach(part)

    try:
        server = smtplib.SMTP(smtp_server, smtp_port)
        server.starttls()  # Upgrade the connection to a secure encrypted SSL/TLS connection
        server.login(sender_email, password)
        server.sendmail(sender_email, receiver_email, message.as_string())
        server.quit()

        print("Email sent successfully")
    except Exception as e:
        print(f"Error sending email: {e}")
    pass

In [None]:
# 把状态存进 paper_status_history.csv 文件中，如果文件存在，查询最后一行的状态，如果状态不同，写入新的状态
# 如果文件不存在，直接写入状态，添加一个写入时间
import os
import csv
from datetime import datetime


def update_csv(data, file_path, cc=None):
    if os.path.exists(file_path):
        with open(file_path, "r", newline="", encoding="utf-8") as csvfile:
            reader = csv.reader(csvfile)
            rows = list(reader)
        # 查询 ID 为 data[2] 的最后一行
        last_row = None
        for row in reversed(rows):
            if row[2] == data[2]:
                last_row = row
                break

        if last_row:
            # 判断 last_row 和 data 是否相同
            if (
                last_row[1] == data[1]
                and last_row[3] == data[3]
                and last_row[2] == data[2]
            ):
                return
            with open(file_path, "a", newline="", encoding="utf-8") as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(data)

            send_email(data, rows, cc=cc)
            print("update submission data")

            # 对csv文件按按照ID（第3列）和时间（第一列）进行排序
            with open(file_path, "r", newline="", encoding="utf-8") as csvfile:
                reader = csv.reader(csvfile)
                rows = list(reader)

            rows = rows[1:]
            rows.sort(key=lambda x: (x[2], x[0]), reverse=False)

            with open(file_path, "w", newline="", encoding="utf-8") as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(["Time", "Status", "ID", "Title"])
                writer.writerows(rows)
        else:
            # 写入文件
            with open(file_path, "a", newline="", encoding="utf-8") as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(data)

            send_email(data, rows, cc=cc)
            print("write new submission data")
    else:
        with open(file_path, "w", newline="", encoding="utf-8") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["Time", "Status", "ID", "Title"])
            writer.writerow(data)

        send_email(data, cc=cc)
        print("write new submission data")

In [None]:
system_dicts = [
    {
        "URL": "https://mc.manuscriptcentral.com/xx",  # journal url
        "userid": "xx",  # your user id
        "password": "xx",  # your password
        "cc": [
            "xx",
            "xx",
        ],  # cc emails, optional
    },
    {
        "URL": "https://mc.manuscriptcentral.com/xx",  # journal url
        "userid": "xx",  # your user id
        "password": "xx",  # your password
    },
]

for system_dict in system_dicts:
    paper_status = get_paper_status(system_dict)

    for status, id, title in paper_status:
        query_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        update_csv(
            (query_time, status, id, title),
            f"paper_status_history.csv",
            cc=system_dict.get("cc", None),
        )

In [None]:
pass