In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.embeddings import resolve_embed_model
from llama_index.llms.ollama import Ollama
from llama_index.readers.file import PDFReader, PyMuPDFReader

from pdfminer.high_level import extract_text

from crewai import Agent, Task, Crew, Process
import os

In [2]:
PROJECT_DIR = os.path.dirname(os.getcwd())
DATA_PATH = os.path.join(PROJECT_DIR, 'data')
RAW_PATH = os.path.join(DATA_PATH, 'raw')
TEXT_DIR = os.path.join(DATA_PATH, 'text')

In [3]:
pdfs = []
for pdf in os.listdir(RAW_PATH):
    if '.pdf' in pdf:
        pdfs.append(os.path.join(RAW_PATH, pdf))

In [None]:
for pdf in pdfs:
    txt_path = os.path.join(RAW_PATH, 'contents.txt')
    with open(txt_path, 'a') as f:
        text = extract_text(pdf)
        f.write(text)

In [4]:
# bge embedding model
Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")

# ollama
Settings.llm = Ollama(model="mistral", temperature=0.3)

In [5]:
documents = SimpleDirectoryReader(RAW_PATH).load_data()

index = VectorStoreIndex.from_documents(
    documents,
)

In [6]:
query_engine = index.as_query_engine()
response = query_engine.query("How many text file are there?")
print(response)

 Based on the provided context information, there are a total of 38 text files mentioned.


In [None]:
import os
from crewai import Agent, Task, Crew, Process
from langchain.tools import DuckDuckGoSearchRun
from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun

from langchain.llms import Ollama

In [None]:
search_tool = DuckDuckGoSearchRun()
ollama_llm = Ollama(model="mistral")

In [None]:
# # Define your agents with roles and goals
# researcher = Agent(
#   role='Senior AI Researcher',
#   goal='To identify potential of AI in the field of hybrid grouper aquaculture',
#   backstory="""A highly skilled researcher in both field of hybrid grouper aquaculture and AI""",
#   verbose=True,
#   allow_delegation=True,
#   tools=[search_tool, SemanticScholarQueryRun()],
#   llm=ollama_llm
# )

# writer = Agent(
#     role='A highly skilled research writer',
#     goal='Provide the full research paper for submission in high impact journal',
#     backstory="""Experienced research paper writer which submitted more than 100 research paper to high impact journal""",
#     verbose=True,
#     allow_delegation=False,
#     llm=ollama_llm
# )

# reviewer = Agent(
#     role='Highly skilled and experienced writer and editor',
#     goal="""Provide the full research paper for submission in high impact journal""",
#     backstory="""As an experienced writer and editor, your role is to meticulously review all written content, validating references for accuracy and novelty. 
#     Your task is to ensure the text meets the high standards necessary for acceptance into a prestigious high-impact research journal. Your expertise will be
#      instrumental in guaranteeing the quality and relevance of the submission.""",
#     verbose=True,
#     allow_delegation=True,
#     tools=[search_tool, SemanticScholarQueryRun()],
#     llm=ollama_llm
# )

# # Create tasks for your agents
# task1 = Task(
#   description="""Get the knowledge of hybrid grouper and AI application in the field of hybrid grouper aquaculture from the year 2020 to 2024""",
#   expected_output = "50 references and its summary in markdown",
#   agent=researcher
# )

# task2 = Task(
#     description="""Write a review research paper tailored for submission to a high-impact journal focussing on AI application to hybrid grouper aquaculture.
#     The paper should encompass essential sections including an introduction, current state of art method, research gap identification, comprehensive discussion, conclusion, and abstract. 
#     Ensure each section is meticulously crafted to meet the standards of high-impact journals, showcasing the significance and novelty of the research findings.""",
#     expected_output = "Full research paper in markdown",
#     agent=writer
# )

# task3 = Task(
#     description="""Conduct a thorough review of the paper to ensure its alignment with the standards required for submission to a high-impact journal. Evaluate the content,
#     structure, clarity, and novelty of the research to enhance its suitability for publication in a prestigious academic journal. Lastly, output me the refined text in markdown""",
#     expected_output = "Give me the full refined research paper in markdown", 
#     agent=reviewer
# )

# # Instantiate your crew with a sequential process
# crew = Crew(
#     agents=[researcher, writer, reviewer],
#     tasks=[task1, task2],
#     verbose=2
# )

# # Get your crew to work!
# result = crew.kickoff()

# print("######################")
# print(result)