In [1]:
from tqdm import tqdm
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.messages.ai import AIMessage
from langchain_core.messages.human import HumanMessage
from langchain_core.messages.system import SystemMessage
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Optional
from datetime import datetime
from enum import Enum
import pandas as pd
import time
import os
import yaml

In [3]:
class Question(BaseModel):
    type: str = Field(..., description="The type of question (multiple_choice or open_ended)")
    text: str = Field(..., description="The text of the question")
    choices: Optional[List[str]] = Field(None, description="The choices for multiple choice questions")
    answer: Optional[str] = Field(None, description="The answer to the question")

4 main topics (5 multiple-choice questions + 5 open-ended questions each)

- Nutrient Solution Management (10 questions)
- Water Quality Monitoring (10 questions)
- Crop Growth Conditions (10 questions)
- Disease Prevention and Control (10 questions)

In [4]:
data_org_df = pd.read_csv("./data_source.csv")
data_org_df

Unnamed: 0,paper_title,paper_url
0,Seasonal Nitrous Oxide Emissions From Hydropon...,https://www.frontiersin.org/journals/sustainab...
1,Microbial Community Analysis and Food Safety P...,https://www.frontiersin.org/journals/microbiol...
2,Effect of Methyl Jasmonate Treatment on Primar...,https://www.frontiersin.org/journals/nutrition...
3,Distinct metabolite classes in root exudates a...,https://www.frontiersin.org/journals/plant-sci...
4,Changes in Leaf Anatomical Traits Enhanced Pho...,https://www.frontiersin.org/journals/plant-sci...
5,Hydroponic lettuce defective leaves identifica...,https://www.frontiersin.org/journals/plant-sci...
6,Integrated physicochemical hormonal and transc...,https://www.frontiersin.org/journals/plant-sci...
7,Use of reclaimed urban wastewater for the prod...,https://www.frontiersin.org/journals/veterinar...
8,Isolation Characterization and Inactivation of...,https://www.frontiersin.org/journals/microbiol...
9,A Flexible Low-Cost Hydroponic Co-Cultivation ...,https://www.frontiersin.org/journals/plant-sci...


In [5]:
multiple_choice_questions = []
open_ended_questions = []
for i in range(10):
    loader = WebBaseLoader(data_org_df.iloc[i]["paper_url"])
    data = loader.load()
    content = data[0].page_content.replace("\n", " ").replace("Frontiers |", " ").replace("espondence: Stefan Karlowsky, karlowsky@igzev.de   Disclaimer:        All claims expressed in this article are solely those of the authors and       do not necessarily represent those of their affiliated organizations, or       those of the publisher, the editors and the reviewers. Any product that       may be evaluated in this article or claim that may be made by its       manufacturer is not guaranteed or endorsed by the publisher.        Footer  Guidelines   Author guidelinesEditor guidelinesPolicies and publication ethicsFee policy Explore   ArticlesResearch Topics Journals Outreach   Frontiers Forum Frontiers Policy Labs Frontiers for Young Minds Connect   Help centerEmails and alerts Contact us SubmitCareer opportunities Follow us  © 2024 Frontiers Media S.A. All rightes Follow us  © 2024 Frontiers Media S.A. All right", " ")
    parser = JsonOutputParser(pydantic_object=Question)
    chatllm = ChatOpenAI(api_key="sk-XX", model_name="gpt-4-turbo")
    template = PromptTemplate(
        template = "You are top expert in Hydroponic, I will show you some content and ask you to provide some questions and answers for the content (one multiple-choice questions and one open-ended questions). \n #format_instructions: {format_instructions} and use list to package up [question1,question2] #Content: {content}",
        input_variables=['content'],
        partial_variables={"format_instructions": parser.get_format_instructions()}
        )
    chain = template | chatllm | parser
    out = chain.invoke({"content": content})
    print(out)
    for question in out:
        if question['type'] == "multiple_choice":
            multiple_choice_questions.append(question)
        else:
            open_ended_questions.append(question)
    time.sleep(1)
    print(f"Processed {i+1} papers")

[{'type': 'multiple_choice', 'text': 'What is the global warming potential of nitrous oxide (N2O) compared to carbon dioxide (CO2) on a 100-year scale?', 'choices': ['10 times higher', '100 times higher', '300 times higher', '500 times higher'], 'answer': '300 times higher'}, {'type': 'open_ended', 'text': 'Why are nitrous oxide emissions from hydroponic systems generally lower compared to field cultivation?', 'answer': 'Nitrous oxide emissions from hydroponic systems are generally lower than field cultivation because hydroponic systems allow for more controlled and precise irrigation and nutrient application, creating more aerobic conditions in the root zones. This minimizes conditions favorable for denitrification processes that are major contributors to N2O emissions. Additionally, the use of inert substrates like rock wool can further reduce emissions by avoiding the organic matter content that fuels microbial activity responsible for N2O production.'}]
Processed 1 papers
[{'type':

In [6]:
multiple_choice_questions

[{'type': 'multiple_choice',
  'text': 'What is the global warming potential of nitrous oxide (N2O) compared to carbon dioxide (CO2) on a 100-year scale?',
  'choices': ['10 times higher',
   '100 times higher',
   '300 times higher',
   '500 times higher'],
  'answer': '300 times higher'},
 {'type': 'multiple_choice',
  'text': 'What is the most dominant genus identified in the hydroponic microgreen farm according to the study?',
  'choices': ['Pseudomonas',
   'Actinobacteria',
   'Cyanobacteria',
   'Proteobacteria'],
  'answer': 'Pseudomonas'},
 {'type': 'multiple_choice',
  'text': 'What is the effect of 500 μM MeJA treatment on the total sugar and amino acid contents of hydroponically grown Chinese chives?',
  'choices': ['It significantly decreased both total sugar and amino acid contents',
   'It significantly increased both total sugar and amino acid contents',
   'It significantly increased total sugar content but decreased amino acid content',
   'No significant changes were

In [8]:
for i in range(10):
    multiple_choice_questions[i]["url"] = data_org_df.iloc[i]["paper_url"]

In [9]:
# save to csv
multiple_choice_questions_df = pd.DataFrame(multiple_choice_questions)
multiple_choice_questions_df.to_csv("multiple_choice_questions.csv", index=False)

In [10]:
for i in range(10):
    open_ended_questions[i]["url"] = data_org_df.iloc[i]["paper_url"]

In [11]:
# save to csv
open_ended_questions_df = pd.DataFrame(open_ended_questions)
open_ended_questions_df.to_csv("open_ended_questions.csv", index=False)