In [None]:
import os
import json
import requests
import xml.etree.ElementTree as ET
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime, timedelta
from openpyxl import Workbook, load_workbook

# ファイル名設定
keyword_file = "search_keywords.json"
result_file = "PubMed_results.xlsx"

# 環境変数から機密情報を取得
api_key = os.getenv("PUBMED_API_KEY")
sender_email = os.getenv("SENDER_EMAIL")
sender_password = os.getenv("SENDER_PASSWORD")
recipient = os.getenv("RECIPIENT")

# キーワードの保存
def save_keywords(keywords):
    with open(keyword_file, "w") as f:
        json.dump(keywords, f)

# キーワードの読み込み
def load_keywords():
    try:
        with open(keyword_file, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        return ["RNA splicing"]

# PubMed API検索
def search_pubmed(query, start_date, end_date, api_key):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    date_range = f"{start_date}:{end_date}[dp]"
    full_query = f"{query} AND {date_range}"
    params = {
        "db": "pubmed",
        "term": full_query,
        "retmode": "json",
        "sort": "date",
        "retmax": 10,
        "api_key": api_key
    }
    response = requests.get(base_url, params=params)
    response.raise_for_status()
    return response.json()["esearchresult"]["idlist"]

# PubMed APIで論文詳細を取得
def fetch_abstracts(ids, api_key):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
    params = {
        "db": "pubmed",
        "id": ",".join(ids),
        "retmode": "xml",
        "rettype": "abstract",
        "api_key": api_key
    }
    response = requests.get(base_url, params=params)
    response.raise_for_status()
    return response.text

# XML解析関数
def parse_pubmed_data(xml_data):
    root = ET.fromstring(xml_data)
    articles = []
    for article in root.findall(".//PubmedArticle"):
        pmid = article.find(".//PMID").text if article.find(".//PMID") is not None else "No PMID"
        title = article.find(".//ArticleTitle").text if article.find(".//ArticleTitle") is not None else "No Title"

        # First AuthorとLast Authorの取得
        first_author = "No First Author"
        first_affiliation = "No First Author Affiliation"
        last_author = "No Last Author"
        last_affiliation = "No Last Author Affiliation"

        authors = article.findall(".//Author")
        if authors:
            # First Author
            first = authors[0]
            first_fore_name = first.find("ForeName")
            first_last_name = first.find("LastName")
            if first_fore_name is not None and first_last_name is not None:
                first_author = f"{first_fore_name.text} {first_last_name.text}"
            first_affiliation_elem = first.find(".//AffiliationInfo/Affiliation")
            if first_affiliation_elem is not None:
                first_affiliation = first_affiliation_elem.text

            # Last Author
            last = authors[-1]
            last_fore_name = last.find("ForeName")
            last_last_name = last.find("LastName")
            if last_fore_name is not None and last_last_name is not None:
                last_author = f"{last_fore_name.text} {last_last_name.text}"
            last_affiliation_elem = last.find(".//AffiliationInfo/Affiliation")
            if last_affiliation_elem is not None:
                last_affiliation = last_affiliation_elem.text

        # その他の情報を取得
        author_list = ", ".join(
            f"{author.find('ForeName').text} {author.find('LastName').text}"
            for author in authors
            if author.find("ForeName") is not None and author.find("LastName") is not None
        ) if authors else "No Authors"
        journal = article.find(".//Title").text if article.find(".//Title") is not None else "No Journal"
        pub_date = article.find(".//PubDate/Year").text if article.find(".//PubDate/Year") is not None else "No Date"
        abstract_parts = [abstract.text for abstract in article.findall(".//AbstractText") if abstract.text]
        abstract = " ".join(abstract_parts) if abstract_parts else "No Abstract"
        link = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"

        articles.append({
            "pmid": pmid,
            "title": title,
            "first_author": first_author,
            "first_affiliation": first_affiliation,
            "last_author": last_author,
            "last_affiliation": last_affiliation,
            "authors": author_list,
            "journal": journal,
            "pub_date": pub_date,
            "abstract": abstract,
            "link": link,
        })
    return articles

# メール本文のHTMLフォーマット
def format_html_email(articles, keyword):
    content = f"<html><body><h2>Results for {keyword}</h2><hr>"
    for article in articles:
        content += f"""
        <p><strong>Title:</strong> {article['title']}</p>
        <p><strong>PMID:</strong> <a href="{article['link']}">{article['pmid']}</a></p>
        <p><strong>First Author:</strong> {article['first_author']} ({article['first_affiliation']})</p>
        <p><strong>Last Author:</strong> {article['last_author']} ({article['last_affiliation']})</p>
        <p><strong>Authors:</strong> {article['authors']}</p>
        <p><strong>Journal:</strong> {article['journal']} ({article['pub_date']})</p>
        <p><strong>Abstract:</strong> {article['abstract']}</p>
        <hr>
        """
    content += "</body></html>"
    return content

# メール送信
def send_email(subject, html_content):
    msg = MIMEMultipart("alternative")
    msg["Subject"] = subject
    msg["From"] = sender_email
    msg["To"] = recipient
    msg.attach(MIMEText(html_content, "html"))
    with smtplib.SMTP("smtp.gmail.com", 587) as server:
        server.starttls()
        server.login(sender_email, sender_password)
        server.sendmail(sender_email, recipient, msg.as_string())

# Excel保存
def save_to_excel(articles, sheet_name):
    if os.path.exists(result_file):
        workbook = load_workbook(result_file)
    else:
        workbook = Workbook()
    if sheet_name in workbook.sheetnames:
        del workbook[sheet_name]
    sheet = workbook.create_sheet(sheet_name)
    sheet.append(["PMID", "Title", "First Author", "First Affiliation", "Last Author", "Last Affiliation", "Authors", "Journal", "Publication Date", "Abstract", "Link"])
    for article in articles:
        sheet.append([article["pmid"], article["title"], article["first_author"], article["first_affiliation"], article["last_author"], article["last_affiliation"], article["authors"], article["journal"], article["pub_date"], article["abstract"], article["link"]])
    workbook.save(result_file)

# メイン処理
def main(keywords):
    start_date = (datetime.now() - timedelta(days=1)).strftime("%Y/%m/%d")
    end_date = datetime.now().strftime("%Y/%m/%d")
    for keyword in keywords:
        ids = search_pubmed(keyword, start_date, end_date, api_key)
        if ids:
            xml_data = fetch_abstracts(ids, api_key)
            articles = parse_pubmed_data(xml_data)
            save_to_excel(articles, sheet_name=keyword)
            html_content = format_html_email(articles, keyword)
            send_email(subject=f"PubMed Results for {keyword}", html_content=html_content)
        else:
            print(f"No new articles for keyword: {keyword}")

# 実行
keywords = load_keywords()
main(keywords)