In [7]:
#!/usr/bin/env python3
import math
import argparse
from pathlib import Path
from PIL import Image

SUPPORTED = {".jpg", ".jpeg", ".png", ".webp", ".tif", ".tiff", ".bmp"}

def infer_ratio(image_paths):
    """Infer a common aspect ratio r = W/H that minimizes total crop loss (median in log-space)."""
    logs = []
    for p in image_paths:
        with Image.open(p) as im:
            w, h = im.size
        if w > 0 and h > 0:
            logs.append(math.log(w / h))
    if not logs:
        raise ValueError("No valid images found to infer ratio.")
    logs.sort()
    mid = len(logs) // 2
    median_log = logs[mid] if len(logs) % 2 == 1 else (logs[mid - 1] + logs[mid]) / 2
    return math.exp(median_log)

def center_crop_to_ratio(im, ratio):
    """
    Center-crop image to target ratio (W/H) without resizing.
    Keeps the largest possible crop that matches the ratio.
    """
    w, h = im.size
    current = w / h

    if abs(current - ratio) < 1e-9:
        return im  # already matches

    if current > ratio:
        # Image is too wide -> crop width
        new_w = int(round(ratio * h))
        new_h = h
    else:
        # Image is too tall -> crop height
        new_w = w
        new_h = int(round(w / ratio))

    left = (w - new_w) // 2
    top = (h - new_h) // 2
    right = left + new_w
    bottom = top + new_h

    return im.crop((left, top, right, bottom))

def main():
    ap = argparse.ArgumentParser(description="Infer a common aspect ratio and center-crop all images in a directory.")
    ap.add_argument("input_dir", help="Directory containing images")
    ap.add_argument("-o", "--output_dir", default="cropped", help="Output directory (default: cropped)")
    ap.add_argume_


In [8]:
import csv, re, textwrap
from pathlib import Path

INPUT_CSV = "./people/team_members.csv"          # <-- your CSV filename/path
OUTPUT_DIR = "./_people"        # <-- folder to create

FIXED_CATEGORY = "people"

def slugify(s: str) -> str:
    s = s.strip().lower()
    s = re.sub(r"[â€™'`]", "", s)
    s = re.sub(r"[^a-z0-9]+", "-", s)
    s = re.sub(r"-{2,}", "-", s).strip("-")
    return s or "person"

out_dir = Path(OUTPUT_DIR)
out_dir.mkdir(parents=True, exist_ok=True)

used = {}  # handle duplicate names like "Lin Lin" / "Linlin Li"
created = []

with open(INPUT_CSV, "r", encoding="utf-8-sig", newline="") as f:
    reader = csv.DictReader(f, delimiter=",")
    print("Detected headers:", reader.fieldnames)

    for row in reader:
        # normalize keys (avoid issues like " title " vs "title")
        row = {(k or "").strip().lower(): (v or "").strip() for k, v in row.items()}

        title = row["title"]
        position = row["position"]
        image = "/people"+row["image"]
        alumni = row["alumni"]
        role_key = row["role_key"]

        base = slugify(title)
        used[base] = used.get(base, 0) + 1
        slug = base if used[base] == 1 else f"{base}-{used[base]}"

        filename = f"{slug}.md"  # or f"2026-01-09-{slug}.md" if you want date prefix
        content = textwrap.dedent(f"""\
        ---
        title: {title}
        categories:
          - {FIXED_CATEGORY}
        position: {position}
        image: {image}
        alumni: {alumni}
        role_key: {role_key}
        ---
        """)

        (out_dir / filename).write_text(content, encoding="utf-8")
        created.append(filename)

print(f"Done. Wrote {len(created)} files to: {out_dir.resolve()}")


Detected headers: ['image', 'title', 'position', 'alumni', 'role_key']
Done. Wrote 31 files to: /Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/_people


In [None]:
## obtain the meta info of paper
import requests
from datetime import datetime
import re
import csv

def get_paper_info_by_title(title):
    # URL for the CrossRef search API (using title search)
    url = "https://api.crossref.org/works"
    params = {
        'query': title,
        'rows': 1  # Limiting to 1 result
    }

    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raises an HTTPError for bad responses
    except requests.exceptions.RequestException as e:
        print(f"Error during API request: {e}")
        return None

    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        message = data.get('message', {})
        print('==================')
        print(message['items'])
        if message.get('items'):
           
            paper_info = message['items'][0]
            print("----------------------")
            print(paper_info)
            # Extract relevant details
            title = paper_info.get('title', ['No title found'])[0]
            authors = [f"{author.get('given', '')} {author.get('family', '')}".strip() for author in paper_info.get('author', [])]
            journal = paper_info.get('container-title', ['No journal found'])[0]

            # Extract date-parts from 'indexed', 'published', or 'issued'
            date_parts = paper_info.get('published-online', {}).get('date-parts', [])

            # Format date-parts if available
            if date_parts and len(date_parts[0]) > 1:
                year, month, *day = date_parts[0]
                day = day[0] if day else 1
                date_str = datetime(year, month, day).strftime('%B %d, %Y')
            else:
                date_str = 'No date found'

            url = paper_info.get('URL', 'No URL found')

            return {
                'title': title,
                'authors': authors,
                'journal': journal,
                'year': date_str,
                'url': url
            }
        else:
            return None
    else:
        print(f"Failed to fetch paper info for '{title}', status code {response.status_code}")
        return None

file_path = '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Apaperlist_from2021.txt'

# Read the content of the file
with open(file_path, 'r') as file:
    content = file.read()

# Regular expression to extract titles (assuming titles are followed by a period or end of line)
paper_titles = re.findall(r'^[\d]+\.\s*(.*?)(?=\.\s|\n|$)', content, re.MULTILINE)

# Path to save the CSV file
output_csv = '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Apaperinfo_from2021.csv'

# Open CSV file for writing
with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Write header row
    writer.writerow(['Title', 'Authors', 'Journal', 'Year', 'URL'])

    # Loop through the paper titles, fetch info, and write to the CSV
    for title in paper_titles:
        paper_info = get_paper_info_by_title(title)  # Assuming you have this function defined
        if paper_info:
            # Write paper info to CSV
            writer.writerow([paper_info['title'], ', '.join(paper_info['authors']), paper_info['journal'], paper_info['year'], paper_info['url']])
        else:
            # Write an entry with "Not found" for missing paper info
            writer.writerow([title, 'Not found', 'Not found', 'Not found', 'Not found'])

print(f"Results have been written to {output_csv}")

KeyboardInterrupt: 

In [36]:
## filter journal without cover image
import os
import pandas as pd

# Define the directory where the cover images are stored
cover_dir = '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Papers_cover/'

# List of journal names (replace with your actual journal names)
journal_names = [
    "Experimental Gerontology",
    "Advanced Science",
    "Nature Communications",
    "Cell Reports",
    # Add more journal names as needed
]

file_path = '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Apaperinfo_from2021.xlsx'

# Read the CSV file into a DataFrame
df = pd.read_excel(file_path, header=0)

# Select the column you want to deduplicate
column_name = 'Journal'

# Deduplicate the column
unique_values = df[column_name].drop_duplicates().to_list()

# Display the result
print(unique_values)

# List to store journals for which cover images don't exist
missing_covers = []

# Loop through each journal name and check if the cover image exists
for journal_name in unique_values:
    # Define the file paths for .webp, .jpg, and .png formats
    webp_file = os.path.join(cover_dir, f"{journal_name}.webp")
    jpg_file = os.path.join(cover_dir, f"{journal_name}.jpg")
    png_file = os.path.join(cover_dir, f"{journal_name}.png")

    # Check if the .webp file exists
    if not os.path.exists(webp_file):
        # Check for .jpg or .png if .webp doesn't exist
        if not (os.path.exists(jpg_file) or os.path.exists(png_file)):
            # If none exist, add the journal name to the missing covers list
            missing_covers.append(journal_name)

# Output the journal names for which the cover images are missing
if missing_covers:
    print("The following journals are missing cover images:")
    for journal in missing_covers:
        print(journal)
else:
    print("All journals have cover images.")


['Experimental Gerontology', 'Advanced Science', 'Nature Communications', 'Cell Report', 'Genome Medicine', 'Geroscience', 'BMC Bioinformatics', 'Cancer Cell', 'Scientific Reports', 'Nature Reviews Genetics', 'Aging', 'European Journal of Medical Research', 'Genome Biology', 'Biochemical Society Transactions', 'Communication Biology', 'Clinical Epigenetics', 'Nature Aging', 'Nature Genetics', 'Philosophical Transactions of the Royal Society B: Biological Sciences', 'Computational Molecular Biology', 'iScience', 'Frontiers in Molecular Biosciences', 'Journal of Neurological Surgery Part B Skull Base', 'Genomics Proteomics Bioinformatics', 'Cancer Research', 'Nature Methods', 'European Journal of Cancer ', 'Nucleic Acids Research', 'Epigenetics', 'Bioinformatics', 'EBioMedicine']
The following journals are missing cover images:
Philosophical Transactions of the Royal Society B: Biological Sciences
Genomics Proteomics Bioinformatics
European Journal of Cancer 


In [5]:
## generate md files for papers
import textwrap
import pandas as pd
import os
from datetime import datetime

# Load the Excel file
file_path = '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Apaperinfo_from2021.xlsx'
df = pd.read_excel(file_path)

# Define the function to generate the .md content
def generate_md(row):
    title = row['Title']
    authors = row['Authors']
    journal = row['Journal']
    year_month = row['Date']
    url=row['URL']
    Is_corresponding=row['Is_corresponding']

    FIXED_CATEGORY = 'papers'

    date_str = str(year_month).split()[0]
    # Extract year and month from the Date column
    try:
        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
        year = date_obj.year
        month = date_obj.strftime('%B')  # Full month name
    except Exception as e:
        print(f"Error parsing date: {e}")
        year, month = '', ''
    
    # Define the image path based on the journal
    image_path = f"/papers/covers/{journal}.jpg"
    
    
    # Create the Markdown content
    md_content = textwrap.dedent(f"""\
        ---                     
        title: "{title}"
        categories:
          - {FIXED_CATEGORY}
        authors: "{authors}"
        year: {year}
        month: {month}
        journal: "{journal}"
        paper_url: "{url}"
        image: "{image_path}"
        Is_corresponding: "{Is_corresponding}"
        ---
        """)
    
    return md_content

output_dir = '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/_papers/'

# Check if the directory exists; if not, create it
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Iterate over each row in the dataframe and generate the markdown content
for index, row in df.iterrows():
    md = generate_md(row)
    
    sanitized_filename = row['Title'].lower().replace(' ', '-').replace('"', '').replace("'", '').replace(':', '-').replace(',', '_').replace('\012', '').replace('?', '_')
    # Define the output file path for each .md file
    output_path = os.path.join(output_dir, f"{sanitized_filename}.md")

    # Write the .md file
    with open(output_path, 'w') as f:
        f.write(md)
    print(f"Generated {output_path}")



Generated /Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/_papers/immune-cell-associated-dna-methylation-responses-to-exercise-in-women--a-bioinformatics-analysis-comparing-pre--and-postmenopausal-stages.md
Generated /Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/_papers/variations-in-innate-immune-cell-subtypes-correlate-with-epigenetic-clocks_-inflammaging-and-health-outcomes.md
Generated /Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/_papers/an-improved-reference-library-and-method-for-accurate-cell-type-deconvolution-of-bulk-tissue-mirna-data.md
Generated /Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/_papers/molecular-landscape-of-modality-specific-exercise-adaptation-in-human-skeletal-muscle-through-large-scale-multi-omics-integration.md
Generated /Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/_papers/unified-high-resolution-immune-cell-fraction-estimat

In [None]:
from PIL import Image
import os

# Directory containing the images
image_dir = '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/covers/'

# Dictionary to hold image filenames and their full paths
images_dict = {}

# Loop through all files in the directory
for filename in os.listdir(image_dir):
    # Get the full file path
    file_path = os.path.join(image_dir, filename)
    
    # Check if it's a file and if the file extension is an image (e.g., .png, .bmp, .jpg, etc.)
    if os.path.isfile(file_path) and filename.lower().endswith(('.png', '.bmp', '.jpg', '.jpeg','webp')):
        # Use the filename (without extension) as the key and the file path as the value
        image_name = os.path.splitext(filename)[0]  # Remove the file extension
        images_dict[image_name] = file_path

# Print the dictionary to check
print(images_dict)


# Output directory for converted images
output_dir = '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Papers_cover1'

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Function to convert images
def convert_to_jpg(image_path, output_dir):
    try:
        # Open the image
        with Image.open(image_path) as img:
            # Get the file name without the extension
            filename = os.path.splitext(os.path.basename(image_path))[0]
            # Create the output path with .jpg extension
            output_path = os.path.join(output_dir, f"{filename}.jpg")
            # Convert and save as .jpg
            img.convert('RGB').save(output_path, 'JPEG')
            print(f"Converted {image_path} to {output_path}")
    except Exception as e:
        print(f"Error converting {image_path}: {e}")

# Iterate through the dictionary and convert each image
for image_name, image_path in images_dict.items():
    convert_to_jpg(image_path, output_dir)

{'Advanced Science': '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Papers_cover/Advanced Science.webp', 'Biochemical Society Transactions': '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Papers_cover/Biochemical Society Transactions.webp', 'Philosophical Transactions of the Royal Societ': '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Papers_cover/Philosophical Transactions of the Royal Societ.webp', 'Cell Report': '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Papers_cover/Cell Report.webp', 'European Journal of Medical Research': '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Papers_cover/European Journal of Medical Research.webp', 'Communication Biology': '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Papers_cover/Communication Biology.png', 'The Oncologist': '/Users/fantastic-lin/

In [53]:
!pip install Pillow




In [55]:
## calculate the general width:heigh ratio of paper cover
import os
from PIL import Image

# Define the directory where the images are stored
image_dir = '/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/covers'

# Define the target aspect ratio (width/height)
target_width = 16
target_height = 9
target_aspect_ratio = target_width / target_height

# Function to calculate the aspect ratio of an image
def calculate_aspect_ratio(image_path):
    with Image.open(image_path) as img:
        width, height = img.size
        return width / height

# Iterate over all files in the directory
for filename in os.listdir(image_dir):
    file_path = os.path.join(image_dir, filename)

    # Check if the file is an image (you can check for specific file extensions like .jpg, .png, etc.)
    if file_path.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
        # Calculate the aspect ratio
        aspect_ratio = calculate_aspect_ratio(file_path)

        # Check if the aspect ratio matches the target
        if abs(aspect_ratio - target_aspect_ratio) < 0.01:  # Allow a small tolerance
            print(f"Image '{filename}' matches the target aspect ratio: {aspect_ratio:.2f}")
        else:
            print(f"Image '{filename}' does not match the target aspect ratio: {aspect_ratio:.2f}")


Image 'Biochemical Society Transactions.jpg' does not match the target aspect ratio: 0.76
Image 'Journal of Neurological Surgery Part B Skull Base.jpg' does not match the target aspect ratio: 0.75
Image 'The Oncologist.jpg' does not match the target aspect ratio: 0.75
Image 'Cancer Cell.jpg' does not match the target aspect ratio: 0.77
Image 'Genomics, Proteomics & Bioinformatics.jpg' does not match the target aspect ratio: 0.97
Image 'Communication Biology.jpg' does not match the target aspect ratio: 0.76
Image 'Scientific Reports.jpg' does not match the target aspect ratio: 0.71
Image 'Annals of Oncology.jpg' does not match the target aspect ratio: 0.78
Image 'iScience.jpg' does not match the target aspect ratio: 0.75
Image 'Frontiers in Molecular Biosciences.jpg' does not match the target aspect ratio: 0.77
Image 'Philosophical Transactions of the Royal Societ.jpg' does not match the target aspect ratio: 0.67
Image 'Geroscience.jpg' does not match the target aspect ratio: 0.74
Image

In [None]:
## generate contribution info from txt file

import re
import pandas as pd

in_path = "/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Apaperlist_from2021.txt"
out_csv = "/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Apaperlist_parsed.csv"

# 1) Read file
with open(in_path, "r", encoding="utf-8") as f:
    lines = f.read().splitlines()

# 2) Group lines into entries starting with "number."
entries = []
current = None

for line in lines:
    line = line.strip()
    if not line:
        continue

    m = re.match(r"^(\d+)\.\s*(.*)$", line)
    if m:
        # start a new entry
        if current:
            entries.append(current)
        current = {"index": int(m.group(1)), "lines": [m.group(2).strip()]}
    else:
        # continuation of the previous entry
        if current:
            current["lines"].append(line)

if current:
    entries.append(current)

# 3) Extract title + contribution
role_re = re.compile(
    r"\b("
    r"Main\s+and\s+Corresponding\s+Author|"
    r"Joint\s+Corresponding\s+Author|"
    r"Corresponding\s+Author|"
    r"Corresponding\s+Autor|"   # common typo in your file
    r"Co-author"
    r")\b\.?",
    re.IGNORECASE
)

def extract_title(entry_text: str) -> str:
    # title is up to the first "." AFTER removing the leading "1. "
    entry_text = re.sub(r"\s+", " ", entry_text).strip()
    return entry_text.split(".", 1)[0].strip()

def extract_contribution(entry_text: str):
    entry_text = re.sub(r"\s+", " ", entry_text).strip()
    matches = list(role_re.finditer(entry_text))
    if not matches:
        return None
    role = matches[-1].group(1).strip()
    role = role.replace("Autor", "Author")  # normalize typo

    # normalize capitalization
    if role.lower() == "co-author":
        return "Co-author"
    parts = []
    for w in role.split():
        parts.append("and" if w.lower() == "and" else w.capitalize())
    return " ".join(parts)

rows = []
for e in entries:
    joined = " ".join(e["lines"])
    rows.append({
        "Index": e["index"],
        "Title": extract_title(joined),
        "Contribution": extract_contribution(joined),
    })

df = pd.DataFrame(rows).sort_values("Index")
df.to_csv(out_csv, index=False, encoding="utf-8")
print(df.head(10))
print(f"\nSaved: {out_csv}")


   Index                                              Title  \
0      1  Immune cell-associated DNA methylation respons...   
1      2  Variations in Innate Immune Cell Subtypes Corr...   
2      3  An improved reference library and method for a...   
3      4  Molecular landscape of sex- and modality-speci...   
4      5  Unified high-resolution immune cell fraction e...   
5      6  Epigenetic clocks and inflammaging: pitfalls c...   
6      7  FastQTLmapping: an ultra-fast and memory effic...   
7      8  Cell-type-specific subtyping of epigenomes imp...   
8      9  Single-cell multi-stage spatial evolutional ma...   
9     10  Interpretable deep learning of single-cell and...   

                 Contribution  
0                   Co-author  
1  Joint Corresponding Author  
2                   Co-author  
3                   Co-author  
4  Joint Corresponding Author  
5        Corresponding Author  
6                   Co-author  
7        Corresponding Author  
8  Joint Correspon

In [4]:
import pandas as pd

# df = ... your dataframe
df=pd.read_excel('/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Apaperinfo_from2021.xlsx', header=0)

df["is_corresponding"] = (
    df["Contribution"]
      .fillna("")                            # handle NaN
      .str.contains("corresponding", case=False, na=False)
)
print(df.head())
df.to_excel("/Users/fantastic-lin/Documents/Andrew/Lab_website/aet21.github.io-master/papers/Apaperinfo_from2021_1.xlsx", index=False)

                                               Title  \
0  Immune cell-associated DNA methylation respons...   
1  Variations in Innate Immune Cell Subtypes Corr...   
2  An improved reference library and method for a...   
3  Molecular Landscape of Modality-Specific Exerc...   
4  Unified high-resolution immune cell fraction e...   

                                             Authors  \
0  Guilherme da Silva Rodrigues, Natalia Yumi Nor...   
1  Xiaolong Guo, Josephine A. Robertson, Andrea A...   
2  Shaoying Zhu, Hui Yang, Jun Liu, Qingsheng Fu,...   
3  Macsue Jacques, Shanie Landen, Adam P Sharples...   
4  Xiaolong Guo, Mahnoor Sulaiman, Alexander Neum...   

                    Journal                                          URL  \
0  Experimental Gerontology  https://doi.org/10.1016/j.exger.2025.112996   
1          Advanced Science       https://doi.org/10.1002/advs.202505922   
2     Nature Communications   https://doi.org/10.1038/s41467-025-60521-x   
3               Cell R