# Data analysis

In [2]:
import os
import pandas as pd
from langchain.docstore.document import Document
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms.ollama import Ollama
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter

def load_and_format_csv(csv_path):
    """Load CSV file and format as a readable table"""
    try:
        df = pd.read_csv(csv_path)
        return df.to_markdown(index=False)
    except Exception as e:
        print(f"Error loading {csv_path}: {str(e)}")
        return "CSV data could not be loaded"

def load_experiment_documents(csv_folder, text_folder):
    """Load and format experiment documents with structured metadata"""
    documents = []
    for filename in os.listdir(csv_folder):
        if filename.endswith('.csv'):
            base_name = filename[:-4]
            csv_path = os.path.join(csv_folder, filename)
            txt_path = os.path.join(text_folder, base_name + '.txt')
            
            if not os.path.exists(txt_path):
                print(f"Warning: No text file for {filename}")
                continue
                
            try:
                # Format CSV as table
                csv_table = load_and_format_csv(csv_path)
                
                # Load and structure text description
                with open(txt_path, 'r', encoding='utf-8') as f:
                    text_content = f.read().strip()
                
                # Create structured document content
                formatted_content = (
                    f"EXPERIMENT: {base_name}\n\n"
                    f"DESCRIPTION:\n{text_content}\n\n"
                    f"DATA RESULTS:\n{csv_table}\n\n"
                    f"END OF EXPERIMENT {base_name}"
                )
                
                metadata = {
                    "experiment": base_name,
                    "source_type": "experiment_data",
                    "data_columns": str(pd.read_csv(csv_path).columns.tolist())
                }
                
                documents.append(Document(
                    page_content=formatted_content,
                    metadata=metadata
                ))
                
            except Exception as e:
                print(f"Error processing {base_name}: {str(e)}")
    
    return documents

# Configuration
text_folder = "/kaggle/input/llama-stuff/Intros"
csv_folder = "/kaggle/input/llama-stuff/Downloaded Samples"
model_name = "llama3.2:3b"  # Verify correct model name

# Load and process documents
documents = load_experiment_documents(csv_folder, text_folder)

# Split documents for better retrieval
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    separators=["\n\nEXPERIMENT:", "\n\nDESCRIPTION:", "\n\nDATA RESULTS:"]
)

split_docs = text_splitter.split_documents(documents)

# Initialize embeddings and vector store
embeddings = OllamaEmbeddings(model=model_name)
vectorstore = FAISS.from_documents(split_docs, embeddings)


  embeddings = OllamaEmbeddings(model=model_name)


In [5]:
# ... (keep previous imports and document loading functions)

def load_experiment_documents(csv_folder, text_folder):
    """Enhanced document loader with biological entity highlighting"""
    documents = []
    for filename in os.listdir(csv_folder):
        if filename.endswith('.csv'):
            base_name = filename[:-4]
            csv_path = os.path.join(csv_folder, filename)
            txt_path = os.path.join(text_folder, base_name + '.txt')
            
            if not os.path.exists(txt_path):
                continue
                
            try:
                # Load and analyze CSV data
                df = pd.read_csv(csv_path)
                csv_table = df.to_markdown(index=False)
                
                # Extract biological entities
                org_data = df['Characteristics: Organism'].unique() if 'Characteristics: Organism' in df.columns else []
                genotype_data = df['Characteristics: Genotype'].unique() if 'Characteristics: Genotype' in df.columns else []
                
                with open(txt_path, 'r', encoding='utf-8') as f:
                    text_content = f.read().strip()
                
                # Create enhanced document structure
                formatted_content = (
                    f"EXPERIMENT: {base_name}\n\n"
                    f"BIOLOGICAL SYSTEM:\n"
                    f"- Organism(s): {', '.join(org_data)}\n"
                    f"- Genotype(s): {', '.join(genotype_data)}\n\n"
                    f"DESCRIPTION:\n{text_content}\n\n"
                    f"KEY DATA COLUMNS:\n{', '.join(df.columns)}\n\n"
                    f"FULL RESULTS:\n{csv_table}"
                )

                metadata = {
                    "experiment": base_name,
                    "organisms": org_data.tolist(),
                    "genotypes": genotype_data.tolist(),
                    "data_columns": df.columns.tolist()
                }
                
                documents.append(Document(
                    page_content=formatted_content,
                    metadata=metadata
                ))
                
            except Exception as e:
                print(f"Error processing {base_name}: {str(e)}")
    
    return documents

# New prompt template focusing on biological analysis
qa_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""You are a plant biology research analyst. Analyze this experimental data to answer agricultural questions:

Context:
{context}

Question: {question}

Follow these steps:
1. Identify relevant experiments mentioning the topic in the user query
2. Examine "Characteristics: Organism" and "Factor Value:" columns
3. Compare different gravity/spaceflight conditions
4. Look for growth patterns or stress responses
5. Connect findings to Earth agriculture potential

Present your answer with:
- 3 key observations from the data
- Specific experimental conditions used
- Quantitative results from tables
- Relevance to crop cultivation

If no data exists on the input topic, state that clearly."""
)

# Modified QA chain with metadata filtering
qa_chain = RetrievalQA.from_chain_type(
    llm=Ollama(model="llama3.2:3b", temperature=0.2),
    chain_type="stuff",
    retriever=vectorstore.as_retriever(
        search_type="mmr",
        search_kwargs={
            "k": 5,
            "filter": {"organisms": "Oryza sativa"}  # Metadata filter
        }
    ),
    chain_type_kwargs={"prompt": qa_prompt},
    return_source_documents=True
)

def analyze_agricultural_trends(query):
    """Specialized analysis function for agricultural insights"""
    response = qa_chain({"query": query})
    
    print("Agricultural Analysis:")
    print(response["result"])
    
    print("\nSupporting Evidence:")
    for doc in response["source_documents"]:
        if "Oryza sativa" in doc.metadata.get("organisms", []):
            print(f"Experiment {doc.metadata['experiment']}:")
            print(f"- Conditions Tested: {doc.metadata.get('data_columns', [])}")
            print(f"- Key Parameters: {[c for c in doc.metadata['data_columns'] if 'Factor Value' in c]}")
            print(f"- Sample Description: {doc.page_content[:300]}...\n")

# Example usage
analyze_agricultural_trends("What cultivation insights can we gain from spaceflight experiments on Oryza sativa?")

  llm=Ollama(model="llama3.2:3b", temperature=0.2),
  response = qa_chain({"query": query})


Agricultural Analysis:
I've analyzed the available data and found relevant experiments related to spaceflight experiments on Oryza sativa (rice). Here are my findings:

**Key Observations:**

1. **Growth Patterns under Microgravity:** Under microgravity conditions, rice plants exhibited altered growth patterns compared to those grown on Earth. Specifically, they showed increased stem length and reduced root depth.
2. **Stress Response to Radiation:** Rice plants exposed to spaceflight radiation showed significant stress responses, including increased oxidative stress markers (e.g., H2O2) and reduced photosynthetic activity.
3. **Water Stress Tolerance:** Rice plants grown in a controlled environment with simulated microgravity conditions demonstrated improved water stress tolerance compared to those grown on Earth.

**Experimental Conditions:**

* Spaceflight experiments were conducted on Oryza sativa using the International Space Station (ISS).
* Microgravity conditions were achieved 

In [6]:
analyze_agricultural_trends("Tell me about cultivatation of wheat in space")

Agricultural Analysis:
I've analyzed the available data related to wheat cultivation in space. Unfortunately, I couldn't find any specific experiments or studies directly addressing wheat cultivation in space. However, I did find some relevant research on plant growth in microgravity and its implications for Earth agriculture.

Here are three key observations from the data:

1. **Microgravity affects root growth**: According to a study published in the Journal of Experimental Botany, plants grown in microgravity exhibit altered root architecture compared to those grown in Earth's gravity (Table 1). The roots of microgravity-grown wheat showed increased length and branching, suggesting an adaptation to the reduced gravitational forces.
2. **Water stress response**: Another experiment conducted by NASA's Plant Research Unit found that wheat plants grown in space exhibited increased water stress responses, including changes in stomatal density and leaf water potential (Table 2). This sugg

# Thematic Data

In [3]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain_community.llms import Ollama
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langchain import hub
import trafilatura

# For SOTA topic extraction using sentence transformers
from sentence_transformers import SentenceTransformer, util
import torch

# For our custom Google Scholar loader
from scholarly import scholarly

# Predefined list of webpages (format: {topic: [urls]})
"""
PREDEFINED_URLS = {
    "AI safety": [
        "https://www.deepmind.com/about/safety-and-security",
        "https://openai.com/safety"
    ],
    "quantum computing": [
        "https://qiskit.org/learn/",
        "https://quantum.microsoft.com/"
    ]
}


"""# List of topics for similarity comparisons


PREDEFINED_URLS = {
    "AI safety": [
        "https://www.deepmind.com/about/safety-and-security",
        "https://openai.com/safety"
    ],
    "quantum computing": [
        "https://qiskit.org/learn/",
        "https://quantum.microsoft.com/"
    ],
    "Fundamentals of Space Agriculture": [
        "https://science.nasa.gov/science-research/biological-physical-sciences/exploring-fundamentals-space-farming/",
        "https://www.nasa.gov/exploration-research-and-technology/growing-plants-in-space/",
        "https://en.wikipedia.org/wiki/Space_farming",
        "https://www.researchgate.net/publication/369004581_Space_Farming",
        "https://cosmosmagazine.com/space/the-challenges-of-growing-plants-in-space/",
        "https://www.colabra.ai/podcasts/talking-biotech/321-the-challenges-of-growing-plants-in-space/",
        "https://www.haifa-group.com/articles/agriculture-space-how-it-started",
        "https://blogs.nasa.gov/kennedy/2017/04/06/space-agriculture-planted-in-history/"
    ],
    "Space Environment and Its Impact on Farming": [
        "https://global.jaxa.jp/article/special/kibo/takahashi_e.html",
        "https://www.frontiersin.org/journals/astronomy-and-space-sciences/articles/10.3389/fspas.2021.729154/full",
        "https://www.frontiersin.org/journals/plant-science/articles/10.3389/fpls.2020.00552/full",
        "https://www.nasa.gov/wp-content/uploads/2009/07/284274main_Radiation_HS_Mod2.pdf",
        "https://www.nss.org/settlement/nasa/spaceresvol2/thermalmanagement.html",
        "https://en.wikipedia.org/wiki/Space_farming",
        "https://www.researchgate.net/publication/223371466_Effects_of_long-term_low_atmospheric_pressure_on_gas_exchange_and_growth_of_lettuce",
        "https://ntrs.nasa.gov/api/citations/20050182975/downloads/20050182975.pdf"
    ],
    "Growth Systems in Space": [
        "https://en.wikipedia.org/wiki/Hydroponics",
        "https://en.wikipedia.org/wiki/Aeroponics",
        "https://en.wikipedia.org/wiki/Aquaponics",
        "https://en.wikipedia.org/wiki/Bioregenerative_life_support_system",
        "https://en.wikipedia.org/wiki/Controlled-environment_agriculture"
    ],
    "Key Crops for Space Farming": [
        "https://en.wikipedia.org/wiki/Plants_in_space",
        "https://astrobotany.com/plants-grown-in-space/",
        "https://science.nasa.gov/wp-content/uploads/2023/05/194_045da502be1970c318149736b5d82901_HavemanNatashaJ.pdf",
        "https://www.soci.org/chemistry-and-industry/cni-data/2015/5/space-farming",
        "https://www.frontiersin.org/journals/plant-science/articles/10.3389/fpls.2020.00199/full",
        "https://www.sciencedirect.com/science/article/abs/pii/S0048969724043353",
        "https://www.frontiersin.org/journals/plant-science/articles/10.3389/fpls.2024.1335744/full",
        "https://www.nature.com/articles/s41598-022-20856-7",
        "https://www.researchgate.net/publication/342838611_Classification_characterization_and_properties_of_edible_and_non-edible_biomass_feedstocks"
    ],
    "Water and Nutrient Management": [
        "https://www.netsolwater.com/how-is-water-recycled-in-the-international-space-stations.php?blog=3100",
        "https://www.sciencedirect.com/science/article/abs/pii/S1001074223003728",
        "https://www.frontiersin.org/journals/plant-science/articles/10.3389/fpls.2020.00417/full",
        "https://www.researchgate.net/publication/11808055_The_Vacuum-Operated_Nutrient_Delivery_System_Hydroponics_for_Microgravity",
        "https://www.ellenmacarthurfoundation.org/circular-examples/closing-the-nutrient-loop",
        "https://www.azolifesciences.com/article/What-is-Closed-Loop-Agriculture.aspx",
        "https://www.sciencedirect.com/science/article/pii/S2090123223003673",
        "https://www.frontiersin.org/journals/astronomy-and-space-sciences/articles/10.3389/fspas.2021.735834/full"
    ],
    "Light and Photosynthesis in Space": [
        "https://www.ledlightexpert.com/nasa_growing_food_space_grow_lights",
        "https://spinoff.nasa.gov/Spinoff2018/cg_7.html",
        "https://phys.org/news/2024-12-scientists-explore-photosynthesis-growth-artificial.html",
        "https://network.febs.org/posts/artificial-photosynthesis-fifty-five-years-of-research-to-colonize-space",
        "https://up2date.uni-bremen.de/en/article/breathing-in-space-artificial-photosynthesis-as-a-key-technology",
        "https://astrobotany.com/optimal-lighting-for-plants-in-space/",
        "https://www.sciencedirect.com/science/article/abs/pii/S0360132321000913",
        "http://nsbri.org/index.html@p=21031.html"
    ],
    "Space Farming Technologies and Innovations": [
        "https://www.nasa.gov/using-ai-ml-for-space-biology-research/",
        "https://www.linkedin.com/pulse/future-space-agriculture-ai-powered-crop-monitoring-luis-figueroa-s3f8e/",
        "https://www.astronomy.com/space-exploration/its-hard-to-grow-food-in-space-these-sensors-can-help/",
        "https://pmc.ncbi.nlm.nih.gov/articles/PMC10352677/",
        "https://3dprint.com/282650/3d-printed-biopods-could-grow-plants-anywhere-on-earth-and-in-space/",
        "https://link.springer.com/chapter/10.1007/978-3-031-18326-3_6",
        "https://www.ignitec.com/insights/iot-in-space-exploration-significant-step-for-innovation-giant-leap-for-civilisation/",
        "https://spacetech-gulf.com/2024/08/agriculture-spacetech-iot-earth-observation/"
    ],
    "Space Farming Missions & Experiments": [
        "https://science.nasa.gov/mission/advanced-plant-habitat/",
        "https://www.nasa.gov/science-research/nasa-initiates-first-grow-out-in-advanced-plant-habitat-on-space-station/",
        "https://www.csmonitor.com/Science/2017/0419/Why-NASA-is-expanding-its-Veggie-space-program",
        "https://www.sierraspace.com/blog/advanced-plant-habitat-creating-farm-to-table-meals-for-the-orbital-age/",
        "https://phys.org/news/2023-10-china-tiny-farm-moon.html",
        "https://www.nature.com/articles/d41586-019-00159-0",
        "https://www.esquireme.com/news/39707-china-is-growing-these-plants-on-the-moon",
        "https://www.reuters.com/technology/space/chinese-astronauts-conduct-experiments-space-including-lunar-bricks-2024-10-29/",
        "https://www.chinadailyhk.com/hk/article/360714",
        "https://www.esa.int/Enabling_Support/Space_Engineering_Technology/MELiSSA_life_support_project_an_innovation_network_in_support_to_space_exploration",
        "https://www.melissafoundation.org/page/melissa-project",
        "https://en.wikipedia.org/wiki/MELiSSA",
        "https://www.euro-case.org/strategic-performance-assessment-for-esas-melissa-project/",
        "https://www.labiotech.eu/trends-news/extra-terrestrial-ecosystems-the-european-space-agency-melissa-program/",
        "https://igrownews.com/wanna-greens-the-first-duckweed-on-spacex/",
        "https://www.nasa.gov/humans-in-space/the-scientific-journey-of-nasas-spacex-crew-3-aboard-the-international-space-station/",
        "https://pmc.ncbi.nlm.nih.gov/articles/PMC10614614/",
        "https://issnationallab.org/press-releases/spacex-crs-15-mission-overview/",
        "https://www.nasa.gov/missions/station/ways-the-international-space-station-helps-us-study-plant-growth-in-space/",
        "https://issnationallab.org/iss360/going-off-the-ground-with-agriculture-research/",
        "https://issnationallab.org/iss360/plant-research-in-space-is-cropping-up/",
        "https://www.tandfonline.com/doi/full/10.1080/17429145.2023.2292220",
    ],
    "Terraforming and Planetary Agriculture":[
        "https://www.space.com/the-universe/mars/food-grows-better-on-the-moon-than-on-mars-scientists-find",
        "https://www.planetary.org/sci-tech/growing-veggies-moon-mars",
        "https://www.theguardian.com/environment/2024/nov/11/australia-aleph-plants-on-the-moon-project-launch-date",
        "https://journals.plos.org/plosone/article?id=10.1371%2Fjournal.pone.0272209",
        "https://pmc.ncbi.nlm.nih.gov/articles/PMC10780443/",
        "https://www.frontiersin.org/journals/astronomy-and-space-sciences/articles/10.3389/fspas.2021.747821/full",
        "https://www.centauri-dreams.org/2023/11/10/mars-agriculture-knowledge-gaps-for-regolith-preparation/",
        "https://www.centauri-dreams.org/2023/01/26/food-production-on-mars-dirt-farming-as-the-most-scalable-solution-for-settlement/",
        "https://www.frontiersin.org/journals/communication/articles/10.3389/fcomm.2022.1007567/full",
    ],
    "Economic and Ethical Aspects":[
        "https://www.weforum.org/stories/2024/04/space-economy-technology-invest-rocket-opportunity/",
        "https://www.marketresearchfuture.com/reports/space-agriculture-market-12425",
        "https://www.globenewswire.com/news-release/2024/05/09/2879052/0/en/Space-Agriculture-Market-Size-to-Surpass-US-12-96-Billion-by-2033-The-Brainy-Insights.html"
    ]
}



PREDEFINED_TOPICS = list(PREDEFINED_URLS.keys())

# Initialize the SOTA embedding model for topic extraction
topic_extractor = SentenceTransformer('all-MiniLM-L6-v2')

def extract_topic(query: str) -> str:
    """
    Extract the most semantically similar topic from the predefined topics.
    Returns the topic if the similarity score exceeds a threshold, else None.
    """
    query_embedding = topic_extractor.encode(query, convert_to_tensor=True)
    topics_embeddings = topic_extractor.encode(PREDEFINED_TOPICS, convert_to_tensor=True)
    cosine_scores = util.cos_sim(query_embedding, topics_embeddings)
    best_score, best_index = torch.max(cosine_scores, dim=1)
    print(best_score.item())
    # Set a threshold (e.g., 0.6) for considering the topic as a match
    if best_score.item() >= 0.6:
        return PREDEFINED_TOPICS[best_index.item()]
    else:
        return None

def web_search(query):
    """Fallback web search using DuckDuckGo"""
    search = DuckDuckGoSearchAPIWrapper()
    return search.results(query, 3)  # Get top 3 results

from typing import List
from langchain.docstore.document import Document

def process_urls(urls: List[str]) -> List[Document]:
    """Process URLs into document chunks with error handling"""
    documents = []
    for url in urls:
        try:
            downloaded = trafilatura.fetch_url(url)
            text = trafilatura.extract(downloaded)
            if text:
                documents.append(Document(
                    page_content=text,
                    metadata={"source": url}
                ))
                print(f"Successfully processed: {url}")
            else:
                print(f"No content found at: {url}")
        except Exception as e:
            print(f"Error processing {url}: {str(e)}")
    
    if not documents:
        raise ValueError("No valid documents found from the provided URLs")
    
    # Split documents into manageable chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return text_splitter.split_documents(documents)

def process_google_scholar(topic: str) -> List[Document]:
    """
    Use the scholarly package to fetch scholarly documents on the topic,
    then create Document objects.
    """
    documents = []
    try:
        search_query = scholarly.search_pubs(topic)
        # Limit to a few results (e.g., top 3)
        for i, result in enumerate(search_query):
            if i >= 3:
                break
            bib = result.get('bib', {})
            title = bib.get('title', 'No Title')
            abstract = bib.get('abstract', 'No Abstract Available')
            content = f"Title: {title}\nAbstract: {abstract}"
            documents.append(Document(
                page_content=content,
                metadata={"source": "Google Scholar"}
            ))
            print(f"Fetched Google Scholar result: {title}")
        if not documents:
            print("No documents found via Google Scholar.")
        return documents
    except Exception as e:
        print(f"Error processing Google Scholar data: {str(e)}")
        return []

# Initialize LLM (Make sure Ollama is running)
llm = Ollama(model="llama3.2:3b")

def answer_with_context(question: str, context_docs: List[Document]):
    """Generate answer using context documents"""
    if not context_docs:
        return "No relevant information found to answer this question."
    
    # Create retrieval chain using the provided context
    embeddings = OllamaEmbeddings(model="llama3.2:3b")
    try:
        vectorstore = FAISS.from_documents(context_docs, embeddings)
    except Exception as e:
        print(f"Error creating vector store: {str(e)}")
        return "Failed to process documents for answering."
    
    retriever = vectorstore.as_retriever(k=3)
    
    # Create QA chain using a predefined prompt
    qa_prompt = hub.pull("rlm/rag-prompt")
    qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
    combine_docs_chain = StuffDocumentsChain(
        llm_chain=qa_chain, document_variable_name="context"
    )
    
    try:
        result = combine_docs_chain.invoke({
            "input_documents": context_docs,
            "question": question
        })
        return result["output_text"]
    except Exception as e:
        print(f"Error generating answer: {str(e)}")
        return "Failed to generate answer."

# --- Caching Strategy Implementation ---
# Cache for topic-based FAISS vector stores
VECTOR_STORE_CACHE = {}

def get_vector_store_for_topic(topic: str, urls: List[str]):
    """
    Retrieve or create a FAISS vector store for the given topic.
    This function uses caching and downloads data from both predefined URLs
    and Google Scholar.
    """
    if topic in VECTOR_STORE_CACHE:
        print(f"Using cached vector store for topic: {topic}")
        return VECTOR_STORE_CACHE[topic]
    
    # Process documents from predefined URLs
    docs_web = process_urls(urls)
    # Process scholarly documents via our custom Google Scholar loader
    docs_scholar = process_google_scholar(topic)
    
    # Combine documents from both sources
    all_docs = docs_web + docs_scholar
    embeddings = OllamaEmbeddings(model="llama3.2:3b")
    
    try:
        vectorstore = FAISS.from_documents(all_docs, embeddings)
        VECTOR_STORE_CACHE[topic] = vectorstore  # Cache for future use
        return vectorstore
    except Exception as e:
        print(f"Error creating vector store: {str(e)}")
        return None

def answer_question(question: str) -> str:
    try:
        # Use SOTA topic extraction to check for a predefined topic
        topic = extract_topic(question)
        
        if topic:
            print(f"Extracted topic: {topic}")
            urls = PREDEFINED_URLS[topic]
            vectorstore = get_vector_store_for_topic(topic, urls)
            
            if vectorstore:
                retriever = vectorstore.as_retriever(k=3)
                context_docs = retriever.get_relevant_documents(question)
                return answer_with_context(question, context_docs)
            else:
                return "Failed to retrieve information for the topic."
        else:
            # If the topic is unsupported, do not process further.
            print("Query topic is not supported. Skipping response.")
            return "The query topic is not supported as it doesn't match any predefined topics."
    
    except Exception as e:
        print(f"Unexpected error: {str(e)}")
        return "An error occurred while processing your request."




modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  llm = Ollama(model="llama3.2:3b")


In [4]:
# First query on AI safety (this will create and cache the vector store)
question1 = "Tell me about quantum computing"
print("Answer 1:", answer_question(question1))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

0.899120569229126
Extracted topic: quantum computing
Successfully processed: https://qiskit.org/learn/
Successfully processed: https://quantum.microsoft.com/
Fetched Google Scholar result: Quantum computing
Fetched Google Scholar result: Optical quantum computing
Fetched Google Scholar result: Quantum computing


  embeddings = OllamaEmbeddings(model="llama3.2:3b")
  context_docs = retriever.get_relevant_documents(question)
  qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
  combine_docs_chain = StuffDocumentsChain(


Answer 1: Quantum computing is a type of computing that uses quantum mechanics to perform calculations, which can be exponentially faster than classical computers for certain tasks. It has the potential to revolutionize various fields by solving complex problems and enabling new applications. Quantum computing aims to overcome the limitations of classical computing, such as noise and error rates, to achieve reliable logical qubits.


In [5]:
question2 = "how do i grow vegetables in space ?"
print(answer_question(question2))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

0.7160781025886536
Extracted topic: Key Crops for Space Farming
Successfully processed: https://en.wikipedia.org/wiki/Plants_in_space
No content found at: https://astrobotany.com/plants-grown-in-space/
No content found at: https://science.nasa.gov/wp-content/uploads/2023/05/194_045da502be1970c318149736b5d82901_HavemanNatashaJ.pdf
Successfully processed: https://www.soci.org/chemistry-and-industry/cni-data/2015/5/space-farming
Successfully processed: https://www.frontiersin.org/journals/plant-science/articles/10.3389/fpls.2020.00199/full
No content found at: https://www.sciencedirect.com/science/article/abs/pii/S0048969724043353
Successfully processed: https://www.frontiersin.org/journals/plant-science/articles/10.3389/fpls.2024.1335744/full
No content found at: https://www.nature.com/articles/s41598-022-20856-7
No content found at: https://www.researchgate.net/publication/342838611_Classification_characterization_and_properties_of_edible_and_non-edible_biomass_feedstocks
Fetched Google



Growing vegetables in space requires a bespoke process design, as plants struggle with root development, watering, and nutrient supply without gravity. NASA has developed systems like the Advanced Plant Habitat (APH) to support plant growth in space, which can provide psychological benefits for astronauts. However, I don't know specific details on how to replicate this setup at home or what types of vegetables are best suited for growing in microgravity.


In [6]:
question3 = "tell me about space agriculture"
print(answer_question(question3))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

0.9353062510490417
Extracted topic: Fundamentals of Space Agriculture
Successfully processed: https://science.nasa.gov/science-research/biological-physical-sciences/exploring-fundamentals-space-farming/
Successfully processed: https://www.nasa.gov/exploration-research-and-technology/growing-plants-in-space/
Successfully processed: https://en.wikipedia.org/wiki/Space_farming
No content found at: https://www.researchgate.net/publication/369004581_Space_Farming
Successfully processed: https://cosmosmagazine.com/space/the-challenges-of-growing-plants-in-space/
Successfully processed: https://www.colabra.ai/podcasts/talking-biotech/321-the-challenges-of-growing-plants-in-space/
Successfully processed: https://www.haifa-group.com/articles/agriculture-space-how-it-started
Successfully processed: https://blogs.nasa.gov/kennedy/2017/04/06/space-agriculture-planted-in-history/
Fetched Google Scholar result: Fundamentals of space medicine
Fetched Google Scholar result: Fundamentals of space biolo



Space agriculture involves growing crops in space, requiring careful management of carbon levels and soil conservation due to the harsh environment. Like on Earth, balancing the right amount of carbon is crucial for successful plant growth in space. This challenge applies across various space-based agricultural settings, including greenhouses and Martian environments.


In [1]:
!pip install -U langchain-community faiss-gpu ollama trafilatura scholarly -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.6/55.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m62.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.6/132.6 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m837.8/837.8 kB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.2/413.2 kB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[2K   

In [2]:
!curl -fsSL https://ollama.com/install.sh | sh
import subprocess
process = subprocess.Popen("ollama serve", shell=True) #runs on a different thread
#Download model
!ollama pull llama3.2:3b
!ollama pull llama3.1:8b


>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%#                                                                                      10.7%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest 
pulling dde5aa3fc5ff...   0% ▕                ▏    0 B/2.0 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling