In [38]:
from fetch_papers import fetch_recent_cv_papers
from user_profiles import create_user_profiles, load_user_profiles
from recommender import recommend_papers_tfidf, BM25_papers_rec
import pandas as pd
from email_utils import authenticate_email, send_recommendations, fetch_feedback
import google.generativeai as genai
import json
import os
import datetime
from email.utils import parsedate_to_datetime, parseaddr
import re
import string

# # Gmail credentials
EMAIL_ADDRESS = "put your email address"
APP_PASSWORD = "put your app password for email"

# # Authenticate
smtp, imap = authenticate_email(EMAIL_ADDRESS, APP_PASSWORD)

# place your google api key json as google_api_key.json in order to use Gemini
with open("google_api_key.json", "r") as file:
    GOOGLE_API_KEY = json.load(file)['key']

genai.configure(api_key=GOOGLE_API_KEY)
generative_model = genai.GenerativeModel('gemini-2.0-flash')

def prompt_generative_model(query):
    response = generative_model.generate_content(query)
    return response.text

def load_user_data(user_id):
    user_data_path = os.path.join('user_data/',user_id+'.json')
    if os.path.exists(user_data_path):
        f = open(user_data_path,'r')
        user_data = json.load(f)
        f.close()
    else:
        user_data = {'recommendations_sent': [], 'liked_papers': [], 'disliked_papers': []}
    return user_data

# checking for a single user
def recommend_for_user(user_id):
    profiles = load_user_profiles()
    papers_df = pd.read_csv('data/papers.csv')
    user = profiles[user_id]
    user_data = load_user_data(user_id)
    liked_ids = [liked_paper[2] for liked_paper in user_data['liked_papers']]
    disliked_ids = [disliked_paper[2] for disliked_paper in user_data['disliked_papers']]
    return BM25_papers_rec(user['research_focus'], papers_df, liked_ids, disliked_ids, top_n=5)

USER_DATA_DIR = "user_data"  # Folder to store per-user JSON files
os.makedirs(USER_DATA_DIR, exist_ok=True)

def store_recommendations_sent(user_id, recommendations):
    user_data_file = os.path.join(USER_DATA_DIR, f"{user_id}.json")
    timestamp = datetime.datetime.now().isoformat()
    rec_list = []

    for _, row in recommendations.iterrows():
        rec_list.append([timestamp, row["title"], row["id"]])

    # Load existing data if present
    if os.path.exists(user_data_file):
        with open(user_data_file, "r") as f:
            user_data = json.load(f)
    else:
        user_data = {
            "recommendations_sent": [],
            "liked_papers": [],
            "disliked_papers": []
        }

    user_data["recommendations_sent"].extend(rec_list)

    with open(user_data_file, "w") as f:
        json.dump(user_data, f, indent=2)

def update_user_feedback(feedback_list):
    
    profiles = load_user_profiles()
    email_to_user_id = {user_data['email']: uid for uid, user_data in profiles.items()}

    # print('email_to_user_id:', email_to_user_id)

    def normalize_title(title):
        # Remove punctuation, convert to lowercase, and keep only first 4 words
        title = title.lower().translate(str.maketrans('', '', string.punctuation))
        return ' '.join(title.split()[:4])

    for sender_email, subject, body, received_time in feedback_list:
        # print(sender_email, subject, received_time)
        sender_email_clean = parseaddr(sender_email)[1]
        user_id = email_to_user_id.get(sender_email_clean)
        # print(user_id)
        if not user_id:
            print(f"No matching user_id found for sender email: {sender_email_clean}")
            continue

        user_data_file = os.path.join(USER_DATA_DIR, f"{user_id}.json")
        if not os.path.exists(user_data_file):
            print(f"No record found for user_id: {user_id}")
            continue

        with open(user_data_file, "r") as f:
            user_data = json.load(f)

        # Extract content before quoted email or signature
        feedback_section = re.split(r'\n?On .+ wrote:', body, flags=re.IGNORECASE)[0]
        # print(feedback_section)

        # Normalize newlines and split lines
        lines = feedback_section.replace('\r\n', '\n').split('\n')
        lines = [line.strip() for line in lines if line.strip()]

        liked_titles = []
        parsing_likes = False
        for line in lines:
            if line.lower().startswith("liked papers"):
                parsing_likes = True
                continue
            if parsing_likes:
                if line.startswith(">") or re.match(r'^\*?\d+\.', line):  # likely start of quoted message
                    break
                liked_titles.append(line)
        # print("Parsed liked titles:", liked_titles)

        # Retrieve latest recommendations
        all_recs = user_data.get("recommendations_sent", [])
        if not all_recs:
            continue

        last_timestamp = all_recs[-1][0]
        last_recs = [rec for rec in all_recs if rec[0] == last_timestamp]

        # Map normalized titles (first 4 words) to original recommendations
        recommended_map = {normalize_title(rec[1]): rec for rec in last_recs}

        # Initialize lists if missing
        user_data.setdefault("liked_papers", [])
        user_data.setdefault("disliked_papers", [])

        # Track which recommendations were liked
        liked_keys = set()

        # Process likes
        for liked_title in liked_titles:
            key = normalize_title(liked_title)
            if key in recommended_map:
                paper = recommended_map[key]
                if paper not in user_data["liked_papers"]:
                    user_data["liked_papers"].append(paper)
                liked_keys.add(key)

        # Process dislikes (all recommendations not matched by liked)
        for rec in last_recs:
            key = normalize_title(rec[1])
            if key not in liked_keys and rec not in user_data["disliked_papers"]:
                user_data["disliked_papers"].append(rec)

        with open(user_data_file, "w") as f:
            json.dump(user_data, f, indent=2)

def send_recommendations_to_user_generative(user_id):
    profiles = load_user_profiles()
    user = profiles[user_id]
    to_email_address = user['email']
    top_recommendations = recommend_for_user(user_id)

    # Prepare a structured list of recommendations with title, link, and abstract
    rec_items = []
    for i, (_, row) in enumerate(top_recommendations.iterrows(), 1):
        rec_items.append({
            "index": i,
            "title": row['title'],
            "link": row['id'],
            "abstract": row['abstract']
        })

    # Build LLM prompt to generate email and summary for each abstract
    formatted_recs = "\n".join(
        f"{item['index']}. Title: {item['title']}\n   Link: {item['link']}\n   Abstract: {item['abstract']}"
        for item in rec_items
    )

    prompt = f"""
You are an assistant for a research recommendation service called SendMeStudies.

A user named {user['name']} is receiving their weekly paper recommendations. Each recommendation includes a title, a link, and an abstract.

Your task:
1. Write a short friendly email body that introduces the recommendations and refers to the service SendMeStudies naturally.
2. For each paper, include the full title, link, and a 3-line summary of the abstract written in simple, clear language. Title should be full. Do not cut it.
3. At the end, include a section reminding the user to reply with feedback starting with "Liked Papers", followed by paper titles.

Then, generate a short, relevant subject line for the email.

Here are the recommendations:

{formatted_recs}

Format your output like this:

Subject: <subject line>

Body:
<email body here>
"""

    generated = prompt_generative_model(prompt)

    # Parse subject and body from the generated response
    try:
        subject_line = generated.split("Subject:")[1].split("\n")[0].strip()
        email_body = generated.split("Body:")[1].strip()
    except IndexError:
        print("Error parsing LLM output. Falling back to static format.")
        subject_line = "Your weekly research paper picks from SendMeStudies"
        email_body = f"Howdy {user['name']}!\n\nHere are this week's research paper recommendations from SendMeStudies:\n"
        for item in rec_items:
            email_body += f"\n{item['index']}. {item['title']}\n   {item['link']}\n"
        email_body += (
            "\n---\nYou can send your feedback by replying to this email.\n"
            "Please start your reply with the line:\n\nLiked Papers\nThen, list the titles of the papers you liked, one per line.\n\n"
            "Example:\nLiked Papers\nPaper Title 1\nPaper Title 2"
        )

    # Send or print the email
    if smtp:
        send_recommendations(
            smtp_server=smtp,
            sender_email=EMAIL_ADDRESS,
            recipient_emails=[to_email_address],
            subject=subject_line,
            recommendation_text=email_body,
        )
    else:
        print('Unable to send recommendations to the user:', user_id)
        print('Subject:', subject_line)
        print('Email body:\n', email_body)

    return top_recommendations

def send_recommendations_to_user(user_id):
    profiles = load_user_profiles()
    user = profiles[user_id]
    to_email_address = user['email']
    top_recommendations = recommend_for_user(user_id)

    # Build the recommendation text
    recommendation_lines = ['Howdy '+user['name']+"!\n\nHere are this week's top research paper recommendations:\n"]

    index = 1
    for idx, row in top_recommendations.iterrows():
        title = row['title']
        link = row['id']  # assuming this is the arXiv link or similar
        recommendation_lines.append(f"{index}. {title}\n   {link}\n")
        index += 1

    recommendation_lines.append("\n---")
    recommendation_lines.append("You can send your feedback by replying to this email.")
    recommendation_lines.append("Please start your reply with the line:\n\nLiked Papers")
    recommendation_lines.append("Then, list the titles of the papers you liked, one per line.\n")
    recommendation_lines.append("Example:\n")
    recommendation_lines.append("Liked Papers\nPaper Title 1\nPaper Title 2")

    recommendation_text = "\n".join(recommendation_lines)

    if smtp:
        send_recommendations(
            smtp_server=smtp,
            sender_email=EMAIL_ADDRESS,
            recipient_emails=[to_email_address],
            subject="Hey There! Your weekly research paper recommendations are here",
            recommendation_text=recommendation_text,
        )
    else:
        print('Unable to send recommendations to the user: ',user_id)

    return top_recommendations

In [9]:
keywords = ['computer', 'vision']
max_result = 1000

fetch_recent_cv_papers(keywords, max_result=max_result)
create_user_profiles()

In [39]:
user_id = 'uday'
top_recs = send_recommendations_to_user_generative(user_id)
store_recommendations_sent(user_id, top_recs)

Sent recommendation to udaysanthoshkgp@gmail.com


In [40]:
user_id = 'harshita'
top_recs = send_recommendations_to_user_generative(user_id)
store_recommendations_sent(user_id, top_recs)

Sent recommendation to harshita.mandalika@tamu.edu


In [41]:
user_id = 'sejeong'
top_recs = send_recommendations_to_user_generative(user_id)
store_recommendations_sent(user_id, top_recs)

Sent recommendation to smoon23@tamu.edu


In [42]:
feedbacks = fetch_feedback(imap)
update_user_feedback(feedbacks)