In [7]:
from autogen import AssistantAgent, UserProxyAgent
import arxiv
import requests
from io import BytesIO
import PyPDF2
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
import re
from collections import Counter
import openai
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from pytrends.request import TrendReq
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
API_KEY = os.environ.get('OPENAI_API_KEY')

# LLM Configuration
def get_llm_config():
    return {
        "api_key": API_KEY,
        "model": "gpt-4-turbo",
        "temperature": 0.7,
        "max_tokens": 1000
    }

# Create Assistant Agent
assistant_agent = AssistantAgent(
    name="research_assistant",
    system_message="""
    You are an AI assistant specialized in research tasks, including paper recommendations, summarization, trend analysis, idea generation, and evaluate idea feasibility.
    All responses must adhere to the following guidelines:

    1) Do not fabricate information in the absence of sufficient evidence, and clearly indicate "알 수 없습니다" or "잘 모르겠습니다" if uncertain.
    2) Verify possible information step-by-step before responding, marking ambiguous or unclear sources as "확실하지 않음."
    3) Base final responses only on verified information, keeping answers concise. If speculation is necessary, disclose it by stating "추측입니다."
    4) If the user's query is unclear or requires further information, first request additional context or details from the user.
    5) Do not confidently assert unverified facts and provide evidence if necessary, including sources or references when available.
    6) For every answer, specify supporting information with references or summarized related links and materials wherever possible.

    # Steps
    - **Paper Recommendations:**
      1. Identify the specific field or topic area the user is interested in.
      2. Search for recent and relevant papers within that field.
      3. Filter and rank them based on relevance, publication date, and impact factor.
      4. Provide a list of recommended papers with a brief description of each.

    - **Summarization:**
      1. Extract key points and findings from the provided paper or research material.
      2. Highlight significant contributions and conclusions.
      3. Write a concise summary that communicates the main insights.

    - **Trend Analysis:**
      1. Gather data or publications related to a specific research domain over time.
      2. Identify patterns, common themes, emerging topics, and shifts in focus.
      3. Present an analysis that explains these trends.

    - **Idea Generation:**
     1. Understand the user's area of interest and goals.
     2. Brainstorm and gather inspiration from recent publications, trends, and gaps in the literature.
     3. Present new research ideas or questions that can be pursued further.

    # Output Format

    - Responses should be in paragraph form, clearly structured and organized according to the task type.
    - For lists, use bullet points or numbered lists where appropriate.
    - Ensure any visualizations are described in detail with clear explanations for what they represent.
    - Use a formal and informative tone suitable for academic or professional contexts.

    # Notes

    - Ensure recommendations are current and papers are from credible sources.
    - For trend analysis, consider incorporating historical data and future predictions when possible.
    - When generating ideas, ensure they are feasible and backed by current research to a reasonable extent.""",
    llm_config=get_llm_config()
)


client = OpenAI(api_key=API_KEY)

# Create User Proxy Agent
def create_user_proxy():
    return UserProxyAgent(
        name="Admin",
        system_message="You are the user proxy handling requests and interacting with the AI assistant.",
        human_input_mode="ALWAYS",
        default_auto_reply="Reply 'TERMINATE' if the task is done.",
        code_execution_config={"use_docker": False}
    )

# Task functions
def recommend_papers_tool(query: str, year: int = None, limit: int = 5) -> str:
    try:
        search_query = f"{query}"
        if year:
            search_query += f" AND submittedDate:[{year}0101 TO {year}1231]"

        user = arxiv.Client()
        search = arxiv.Search(query=search_query, max_results=limit, sort_by=arxiv.SortCriterion.Relevance)
        papers = [
            f"Title: {result.title}\nAuthors: {', '.join([author.name for author in result.authors])}\nAbstract: {result.summary}\nSubmitted Date: {result.updated.date().isoformat()}\n"
            for result in user.results(search)
        ]
        return f"Recommended papers for '{query}':\n\n" + "\n\n".join(papers)
    except Exception as e:

        return f"Error while recommending papers: {e}"

def summarize_pdf_tool(url: str) -> str:
    try:
        nltk.download('punkt_tab')
        response = requests.get(url)
        response.raise_for_status()
        memory_file = BytesIO(response.content)
        pdf_reader = PyPDF2.PdfReader(memory_file)
        pdf_text = "".join(page.extract_text() for page in pdf_reader.pages)

        if not pdf_text.strip():
            return "No text could be extracted from the PDF."

        parser = PlaintextParser.from_string(pdf_text, Tokenizer("english"))
        summarizer = LexRankSummarizer()
        total_sentences = len(parser.document.sentences)  # 전체 문장 개수 계산
        summary_length = max(3, total_sentences // 5)  # 전체 문장의 20%를 요약 (최소 3문장)

        summary = summarizer(parser.document, sentences_count=summary_length)
        return "PDF Summary: " + " ".join(str(sentence) for sentence in summary)

    except Exception as e:
        return f"Error summarizing PDF: {e}"

# 연구 트렌드 분석 툴

def extract_research_trends(query: str, year: int = 2025, limit: int = 10) -> str:
    papers = recommend_papers_tool(query, year, limit)
    abstracts = [p.split("\n")[3] for p in papers.split("\n\n") if "Abstract:" in p]

    if not abstracts:
        return "No abstracts found. Unable to analyze trends."

    # TF-IDF 적용
    vectorizer = TfidfVectorizer(stop_words='english', max_features=10)
    tfidf_matrix = vectorizer.fit_transform(abstracts)
    top_keywords = vectorizer.get_feature_names_out()

    # LLM을 활용해 트렌드 요약
    prompt = f"""
    최근 {query} 관련 연구에서 도출된 주요 키워드는 {', '.join(top_keywords)}입니다.
    이 키워드를 기반으로 최근 연구 트렌드를 분석해 주세요.
    """


    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

# 연구 아이디어 생성 툴
def generate_research_idea(query: str, year: int = 2025, limit: int = 20) -> str:
    papers = recommend_papers_tool(query, year, limit)
    abstracts = [p.split("\n")[3] for p in papers.split("\n\n") if "Abstract:" in p]

    if not abstracts:
        return "No abstracts found. Unable to generate research ideas."

    # 연구 공백(한계점) 분석 → 새로운 연구 아이디어 도출
    prompt = f"""
    최근 {query} 관련 연구 논문을 참고하세요:
    {abstracts[:3]}

    1) 이 논문들에서 제시된 주요 연구 한계점(Gap)은 무엇인가요?
    2) 이를 해결할 수 있는 새로운 연구 아이디어를 제안하세요.
    3) 실현 가능성을 1~10점으로 평가하고, 이유를 설명하세요.
    """


    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content


# 연구 아이디어 실현 가능성 평가 툴
def evaluate_feasibility(research_idea: str) -> str:
    prompt = f"""
    Here is a research idea:
    {research_idea}

    다음 기준을 적용하여 실현 가능성을 평가하세요:
    1) 기술 준비 수준(TRL) → 1~9단계
    2) 데이터 가용성 (1: 매우 부족 ~ 10: 풍부)
    3) 연구 수행 난이도 (1: 쉬움 ~ 10: 어려움)

    각 항목별 점수를 부여하고, 총평을 작성하세요.
    """


    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

# Register tools to the assistant
assistant_agent.register_for_llm(name="recommend_papers", description="Recommend research papers.")(recommend_papers_tool)
assistant_agent.register_for_llm(name="summarize_pdf", description="Summarize plain text.")(summarize_pdf_tool)
assistant_agent.register_for_llm(name="extract_research_trends", description="Analyze research trends from recent papers.")(extract_research_trends)
assistant_agent.register_for_llm(name="generate_research_idea", description="Generate novel research ideas.")(generate_research_idea)
assistant_agent.register_for_llm(name="evaluate_feasibility", description="Evaluate the feasibility of a research idea.")(evaluate_feasibility)

# Initialize User Proxy
user_proxy = create_user_proxy()
user_proxy.register_for_execution(name="recommend_papers")(recommend_papers_tool)
user_proxy.register_for_execution(name="summarize_pdf")(summarize_pdf_tool)
user_proxy.register_for_execution(name="extract_research_trends")(extract_research_trends)
user_proxy.register_for_execution(name="generate_research_idea")(generate_research_idea)
user_proxy.register_for_execution(name="evaluate_feasibility")(evaluate_feasibility)

# Main Execution
if __name__ == "__main__":
    print("Welcome to the interactive research assistant!")
    print("You can ask questions like:")
    print("- 추천: LLM에 관한 논문 3개 추천해줘, 2023년 이후.")
    print("- 요약: 연구 논문의 내용을 요약해줘.")
    print("- 연구 트렌드 분석: 특정 주제의 최신 연구 동향을 분석해줘.")
    print("- 연구 아이디어 생성: 새로운 연구 주제를 추천해줘.")
    print("- 연구 아이디어 실현 가능성 평가: 연구 아이디어의 현실성을 평가해줘.")
    # 종료 방법 안내 추가
    print("\nType 'exit' or 'quit' at the 'You: ' prompt below to end the session.") 


    while True:
        # 이 프롬프트는 각 작업 요청 전에 표시됩니다.
        user_input = input("You: ").strip() 
        
        # 여기서 'exit' 또는 'quit'를 입력하면 프로그램이 종료됩니다.
        if user_input.lower() in ["exit", "quit"]:
            print("Exiting the assistant. Goodbye!")
            break # while 루프를 탈출합니다.

        try:

            response = user_proxy.initiate_chat(
                assistant_agent, 
                messages=[{"role": "user", "content": user_input}]
            )
            print(f"Assistant: {response}")
        except Exception as e:
            print(f"Error: {e}")

Welcome to the interactive research assistant!
You can ask questions like:
- 추천: LLM에 관한 논문 3개 추천해줘, 2023년 이후.
- 요약: 연구 논문의 내용을 요약해줘.
- 연구 트렌드 분석: 특정 주제의 최신 연구 동향을 분석해줘.
- 연구 아이디어 생성: 새로운 연구 주제를 추천해줘.
- 연구 아이디어 실현 가능성 평가: 연구 아이디어의 현실성을 평가해줘.

Type 'exit' or 'quit' at the 'You: ' prompt below to end the session.
[33mAdmin[0m (to research_assistant):

123

--------------------------------------------------------------------------------
[33mresearch_assistant[0m (to Admin):

안녕하세요! 도움이 필요한 부분이 있으시면 구체적으로 말씀해 주세요. 연구 관련 도움이 필요하시면 어떤 주제나 분야에 대해 더 알고 싶으신지 알려주시면 좋겠습니다.

--------------------------------------------------------------------------------
[33mAdmin[0m (to research_assistant):

123

--------------------------------------------------------------------------------
[33mresearch_assistant[0m (to Admin):

안녕하세요! 어떤 도움이 필요하신지 구체적으로 알려주시면 도움을 드리겠습니다. 연구 관련 질문이나 다른 문의 사항이 있으면 말씀해 주세요.

--------------------------------------------------------------------------------
[31m
>>>>>>>> NO

KeyboardInterrupt: Interrupted by user