<a href="https://colab.research.google.com/github/dakshitasharma/Ask-the-Universe---NASA-RAG-Chatbot-with-LangChain-Gemini-and-Hugging-Face/blob/main/Ask_the_Universe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
#Install all the packages


#!pip install streamlit pyngrok langchain langchain_google_genai langchain_community chromadb faiss-cpu huggingface-hub google-generativeai --quiet


In [33]:
%%writefile .env
GOOGLE_API_KEY=""

Writing .env


#Create a app.py file to so that we can integrate it with Stramlit app

In [35]:

#Import the liberary
%%writefile app.py
import streamlit as st
import requests
import os
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
import google.generativeai as genai
from dotenv import load_dotenv

# Load API keys
load_dotenv()


GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")

#  KEYS (Replace with your keys)

#GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')

# 🔹 Configure Gemini
genai.configure(api_key=GEMINI_API_KEY)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GEMINI_API_KEY)

# 🔹 Hugging Face Embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 🔹 NASA API Fetch
def get_nasa_data(query):
    url = f"https://images-api.nasa.gov/search?q={query}"
    try:
        response = requests.get(url).json()

        # 🔍 Print the raw NASA response (just first item)
        items = response.get("collection", {}).get("items", [])
        print(f"🔍 NASA returned {len(items)} results")
        if items:
            print(f"Sample NASA item: {items[0]}")

        docs = []
        for item in items[:30]:  # Only top 3 results
            title = item["data"][0]["title"]
            desc = item["data"][0].get("description", "")
            text = f"{title}\n{desc}"
            docs.append(Document(page_content=text))

        return docs
    except Exception as e:
        print(f"Error fetching NASA data: {e}")
        return []
# 🔹 Build FAISS VectorStore
def build_vectorstore(docs):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    splits = splitter.split_documents(docs)
    return FAISS.from_documents(splits, embeddings)

# 🔹 RAG Pipeline
def ask_universe(question):
    nasa_docs = get_nasa_data(question)
    if not nasa_docs:
        return "No NASA data found."
    vs = build_vectorstore(nasa_docs)
    retriever = vs.as_retriever()
    qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return qa.run(question)

# 🔹 Streamlit App
st.set_page_config(page_title="Ask the Universe", page_icon="🚀")
st.title("🚀 Ask the Universe (NASA + LangChain RAG)")
st.write("Ask me anything about space! I’ll fetch NASA data and answer using Gemini.")

query = st.text_input("🔭 Enter your space question:")
if st.button("Ask"):
    if query.strip():
        with st.spinner("Contacting NASA & thinking..."):
            answer = ask_universe(query)
        st.markdown(f"### 🌌 Answer:\n{answer}")
    else:
        st.warning("Please enter a question!")


Overwriting app.py


Create a link by using ngrok which can run app.py

In [30]:
from pyngrok import ngrok
import time

from google.colab import userdata


# Set your ngrok token

NGROK_TOKEN=userdata.get('NGROK_TOKEN')
ngrok.set_auth_token(NGROK_TOKEN)

# Start Streamlit
!nohup streamlit run app.py --server.port 8501 > /dev/null 2>&1 &
time.sleep(3)
public_url = ngrok.connect(8501)
print(f"🌍 Your RAG App is live here: {public_url}")


🌍 Your RAG App is live here: NgrokTunnel: "https://2d2edf981e25.ngrok-free.app" -> "http://localhost:8501"
