In [1]:
from phi.agent import Agent, RunResponse
from phi.model.groq import Groq
import dotenv
import pandas as pd
import requests
from bs4 import BeautifulSoup
from web_search import brave_search, ddg_search
import os

# Init
dotenv.load_dotenv()


True

In [2]:
agent = Agent(
    model=Groq(id="llama-3.3-70b-versatile"),
    add_chat_history_to_messages=True,
    num_history_responses=5,
    tools=[],
    markdown=True
)

search_engine = 'brave' # brave or ddg
pi_name = "Ibrahim Cisse"
affiliation = ''

# Get Google Scholar Link
query = pi_name + " " + affiliation + " " + "Google Scholar"

if search_engine == 'brave':
    search_result = brave_search(query=query)
else:
    search_result = ddg_search(query=query)

search_result

200


'Title: \u202aIbrahim Cissé\u202c - \u202aGoogle Scholar\u202c\nURL: https://scholar.google.fr/citations?user=SsuqMfwAAAAJ&hl=en\nDescription: \u202aMax Planck Institute, Freiburg\u202c - \u202a\u202aCited by 8,152\u202c\u202c - \u202aPhysics\u202c - \u202aBiophysics\u202c\n\nTitle: Ibrahim Cissé (academic) - Wikipedia\nURL: https://en.wikipedia.org/wiki/Ibrahim_Ciss%C3%A9_(academic)\nDescription: Ibrahim I. Cissé is a Nigerien-American biophysicist. He is currently director of the Max Planck Institute of Immunobiology and Epigenetics. Previously, Cissé was at the California Institute of Technology as Professor of Physics and the Massachusetts Institute of Technology as Professor of ...\n\nTitle: The Cissé Laboratory @ MIT\nURL: http://www.icisse.org/\nDescription: Endowed Physics Professorship: Ibrahim has been named to hold the Class of 1922 Career Development Chair at MIT.\n\nTitle: Ibby (@IbrahimCisse_) / Twitter\nURL: https://twitter.com/ibrahimcisse_\nDescription: The latest twee

In [3]:
gs_task = f"Provided below are top web search results for the Google Scholar webpage of {pi_name}, {affiliation}. Decide which of the results looks like it is the official Google Scholar page and provide ONLY the link to the Google Scholar page, and nothing else in your response. If none of the results look like the correct person's Google Scholar webpage, respond with only '0'"
gs_prompt = f"{gs_task} \n<WEB RESULTS START>\n{search_result}\n<WEB RESULTS END>"

# agent.print_response(prompt, stream=True)
gs_run = agent.run(gs_prompt)
# pprint([m.model_dump(include={"role", "content"}) for m in agent.memory.messages])
# print(gs_run.content)
if gs_run.content != '0':
    scholar_url = gs_run.content + "&view_op=list_works&sortby=pubdate"

# Read publication list on Google Scholar
scholar_page = requests.get(scholar_url)

scholar_page.status_code

429

In [8]:
scholar_page = requests.get(scholar_url, headers={'User-agent': 'mail-agentv0.1'})

scholar_page.status_code

200

In [9]:
if str(scholar_page.status_code).startswith('2'):
    soup = BeautifulSoup(scholar_page.text, 'html.parser')
    publist = soup.find_all(id='gsc_a_t')[0]
    gs_html = publist.prettify()

gs_html

'<table id="gsc_a_t">\n <thead>\n  <tr aria-hidden="true" id="gsc_a_tr0">\n   <th class="gsc_a_t">\n   </th>\n   <th class="gsc_a_c">\n   </th>\n   <th class="gsc_a_y">\n   </th>\n  </tr>\n  <tr id="gsc_a_trh">\n   <th class="gsc_a_t" scope="col">\n    <span id="gsc_a_ta">\n     <a class="gsc_a_a" href="/citations?hl=en&amp;oe=ASCII&amp;user=SsuqMfwAAAAJ&amp;view_op=list_works&amp;sortby=title">\n      Title\n     </a>\n    </span>\n    <div class="gs_md_r gs_md_rmb gs_md_rmbl" id="gsc_dd_sort-r">\n     <button aria-controls="gsc_dd_sort-d" aria-haspopup="true" class="gs_in_se gs_btn_mnu gs_btn_flat gs_btn_lrge gs_btn_half gs_btn_lsu gs_press gs_md_tb" id="gsc_dd_sort-b" ontouchstart="gs_evt_dsp(event)" type="button">\n      <span class="gs_wr">\n       <span class="gs_lbl">\n        Sort\n       </span>\n       <span class="gs_icm">\n       </span>\n      </span>\n     </button>\n     <div class="gs_md_d gs_md_ds gs_md_ulr" id="gsc_dd_sort-d" role="menu" tabindex="-1">\n      <div cla

In [10]:
# Extract most relevant papers
extr_task = f"Provided below is the Google Scholar HTML Table of {pi_name}'s publication list sorted by most recent. From this, pick the 3 most relevant to the criteria: AI/ML, mathematical modelling, genomics. Your response should contain only the publication names listed from 1 to 3."
extr_prompt = f'{extr_task}\n<HTML TABLE STARTS>\n{gs_html}\n<HTML TABLE ENDS>'
extr_run = agent.run(extr_prompt)
top_papers = extr_run.content

top_papers

'1. Light-induced targeting enables proteomics on endogenous condensates\n2. Direct observation of a condensate effect on super-enhancer controlled gene bursting\n3. RNA-mediated feedback control of transcriptional condensates'

In [13]:
# Personalize email
para_agent = Agent(
    model=Groq(id="gemma2-9b-it"),
    # add_chat_history_to_messages=True,
    # num_history_responses=5,
    tools=[],
    markdown=True
)

example_para = "I find the ways the brain captures, encodes, and processes information to be extremely fascinating, and your lab's work on elucidating the mechanisms of memory and learning has been a significant inspiration. I'm particularly interested in your projects combining genetics with neuroscience, and thoroughly enjoyed reading your paper on how neuronal ensemble dynamics in the hippocampus underlie episodic memory formation, as well as your work on the role of synaptic plasticity in the retrosplenial cortex in contextual learning, especially its insights into activity-dependent transcriptional and epigenetic programs critical for memory retrieval and consolidation."
personalize_task = "Given the chosen papers (given below), talk about my interest in them in a short paragraph that mimics the example paragraph given below. The paragraph should talk about my interest in the papers, not plainly highlighting what the paper is talking about. The paragraph is meant to be sent to the PI/Professor whose lab has produced this research. Respond with only the paragraph and nothing else."
personalize_prompt = f'{personalize_task}\n<EXAMPLE PARAGRAPH STARTS>\n{example_para}\n<EXAMPLE PARAGRAPH ENDS>\nRELEVANT PAPERS: {top_papers}'
personalize_run = para_agent.run(personalize_prompt)

personalized_para = personalize_run.content
personalized_para

"I am deeply intrigued by the emerging field of liquid-liquid phase separation and its implications for understanding gene regulation and cellular function.  Your lab's pioneering work on visualizing and manipulating transcriptional condensates, particularly the innovative use of light-induced targeting for proteomics, is incredibly exciting. I am particularly fascinated by the direct observation of condensate effects on super-enhancers and the role of RNA-mediated feedback in regulating these dynamic structures. \n\n\n"

In [2]:
import pandas as pd
pi_list = pd.read_csv('sample_list.csv')

print(pi_list['pi_name'])

0    Ibrahim Cisse
1      Sean Murray
Name: pi_name, dtype: object
