In [2]:

# 1) Install dependencies
!pip -q install streamlit pyngrok PyPDF2 sentence_transformers faiss-cpu transformers accelerate


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m56.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m59.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -q --upgrade torch
!pip install -q transformers triton==3.4 kernels
!pip uninstall -q torchvision torchaudio -y


In [3]:
%%writefile app.py
# app.py
import streamlit as st
import pandas as pd
import hashlib
import faiss
import io
import os
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import numpy as np
import openai


# --------------------------
# Setup API client
# --------------------------
#new_var = os.environ.get("SYNTHETIC_API_KEY")
client = openai.OpenAI(
  # api_key=new_var,
    api_key="syn_6042570bd16bbf5f4a847147884c9a80",
    base_url="https://api.synthetic.new/v1",
)

# --------------------------
# Load embedding model
# --------------------------
@st.cache_resource
def load_embedder():
    return SentenceTransformer("all-MiniLM-L6-v2")

embedder = load_embedder()

# --------------------------
# PDF handling
# --------------------------
def extract_text_from_pdf(file_bytes):
    reader = PdfReader(io.BytesIO(file_bytes))
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + "\n"
    return text

def chunk_text(text, chunk_size=500):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

# --------------------------
# Cached PDF embeddings
# --------------------------
@st.cache_resource(show_spinner=False)
def get_pdf_index(file_bytes):
    file_hash = hashlib.md5(file_bytes).hexdigest()
    text = extract_text_from_pdf(file_bytes)
    pdf_chunks = chunk_text(text)
    vectors = embedder.encode(pdf_chunks, convert_to_numpy=True)
    index = faiss.IndexFlatL2(vectors.shape[1])
    index.add(vectors)
    return pdf_chunks, index

# --------------------------
# Retrieval + prompt building
# --------------------------
def make_prompt(question, df_context, pdf_chunks, pdf_index):
    pdf_context = ""
    if pdf_chunks and pdf_index:
        q_vec = embedder.encode([question], convert_to_numpy=True)
        D, I = pdf_index.search(q_vec, k=3)
        pdf_context = "\n".join([pdf_chunks[i] for i in I[0]])

    csv_context = ""
    if df_context is not None:
        csv_context = "\n".join(
            f"{row['timestamp']} | {row['sensor_id']} | {row['event_type']} | {row['payload']}"
            for _, row in df_context.iterrows()
        )

    return f"""
You are Eco-GPT, a scientific assistant.

Logs:
{csv_context}

Scientific Notes:
{pdf_context}

Question: {question}
Answer (be accurate, concise, and eco-friendly):
"""

# --------------------------
# Streamlit UI
# --------------------------
st.set_page_config(page_title="Eco-GPT", page_icon="🌱")
st.title("🌱 Eco-GPT: Scientific Q&A")
st.caption("Upload CSV logs + scientific PDFs, then ask natural questions. Powered by GPT-OSS via Synthetic API.")

df = None
pdf_chunks, pdf_index = None, None

uploaded_csv = st.file_uploader("Upload CSV logs", type=["csv"])
uploaded_pdf = st.file_uploader("Upload scientific PDF", type=["pdf"])

if uploaded_csv:
    df = pd.read_csv(uploaded_csv)
    st.subheader("📜 CSV Preview")
    st.dataframe(df.head(20))

if uploaded_pdf:
    with st.spinner("Extracting & indexing PDF... (cached after first run)"):
        file_bytes = uploaded_pdf.getvalue()
        pdf_chunks, pdf_index = get_pdf_index(file_bytes)
    st.success("PDF indexed successfully ✅")

user_input = st.text_area("Ask a question:", "What animals were near stream #3?")

if st.button("Ask Eco-GPT"):
    if user_input.strip():
        with st.spinner("Thinking..."):
            prompt = make_prompt(user_input, df, pdf_chunks, pdf_index)

            completion = client.chat.completions.create(
                model="hf:openai/gpt-oss-120b",
                messages=[
                    {"role": "system", "content": "You are Eco-GPT, a scientific assistant."},
                    {"role": "user", "content": prompt},
                ]
            )

            answer = completion.choices[0].message.content

        st.subheader("🌍 Eco-GPT Answer")
        st.write(answer)
    else:
        st.warning("Please type a question first.")


Writing app.py


In [6]:
#Cell 4 Run in Google Colab with ngrok"
import os, subprocess, time, threading
from pyngrok import ngrok

# Set your ngrok auth token (get free token from ngrok.com)

# (Optional) Paste your ngrok auth token here for a more stable tunnel
NGROK_AUTH_TOKEN = "328AAEi1ftAKNJGyCIyCrcfNtwI_3hsW7NkpZNsnanimRsTin"  # e.g. "2Qx...your_token...Abc"
if NGROK_AUTH_TOKEN:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

PORT = 8501

# Close any existing tunnels to avoid duplicates
for t in ngrok.get_tunnels():
    try:
        ngrok.disconnect(t.public_url)
    except:
        pass

# Start Streamlit in the background
cmd = [
    "streamlit", "run", "app.py",
    "--server.port", str(PORT),
    "--server.address", "0.0.0.0",
    "--server.headless", "true",
]
log_path = "/content/streamlit.log"
log_file = open(log_path, "w")
proc = subprocess.Popen(cmd, stdout=log_file, stderr=log_file, text=True)

# Give Streamlit a moment to boot
time.sleep(3)

# Start ngrok tunnel
tunnel = ngrok.connect(addr=PORT, proto="http")
public_url = tunnel.public_url
print("Your app is live at:", public_url)

# Live-tail the Streamlit logs so you can see when it's ready
def tail_logs(path):
    with open(path, "r") as f:
        f.seek(0, os.SEEK_END)
        while True:
            line = f.readline()
            if line:
                print(line, end="")
            else:
                time.sleep(0.5)

threading.Thread(target=tail_logs, args=(log_path,), daemon=True).start()

# Keep the cell alive so the tunnel stays open
try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    pass


Your app is live at: https://6f975e12a24a.ngrok-free.app


In [None]:
%%writefile sample_logs.csv
timestamp,sensor_id,event_type,payload,metadata
2024-08-01T06:14:00,cam_trap_01,image,Jaguar observed near stream #3,"{"lat": -3.45, "lon": -62.78}"
2024-08-01T06:15:00,mic_01,audio,Cicada chorus intensity: high,"{"temp_c": 27.3}"
2024-08-01T06:20:00,env_01,temperature,Air temperature 28°C,"{"humidity":71}"


Writing sample_logs.csv
