<a href="https://colab.research.google.com/github/darengarciaH/AI-Question-Answering/blob/main/chatbot/sac_app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U -q "google-generativeai>=0.7.2" # Install the Python SDK
!pip install google-ai-haystack
!pip install streamlit
!pip install pyngrok
!pip install "sentence-transformers>=4.1.0"

Collecting google-ai-haystack
  Downloading google_ai_haystack-5.4.0.post1-py3-none-any.whl.metadata (2.3 kB)
Collecting haystack-ai>=2.11.0 (from google-ai-haystack)
  Downloading haystack_ai-2.20.0-py3-none-any.whl.metadata (15 kB)
Collecting filetype (from haystack-ai>=2.11.0->google-ai-haystack)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting haystack-experimental (from haystack-ai>=2.11.0->google-ai-haystack)
  Downloading haystack_experimental-0.14.3-py3-none-any.whl.metadata (18 kB)
Collecting lazy-imports (from haystack-ai>=2.11.0->google-ai-haystack)
  Downloading lazy_imports-1.1.0-py3-none-any.whl.metadata (11 kB)
Collecting posthog!=3.12.0 (from haystack-ai>=2.11.0->google-ai-haystack)
  Downloading posthog-7.0.1-py3-none-any.whl.metadata (6.0 kB)
Collecting backoff>=1.10.0 (from posthog!=3.12.0->haystack-ai>=2.11.0->google-ai-haystack)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Downloading google_ai_haystack-5.4.0.post1-py3

In [None]:
from google.colab import drive, userdata
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%%writefile /content/drive/MyDrive/sac_streamlit.py

import os
import sys
import pandas as pd
import streamlit as st
import glob

# Add path to sac_utils
UTILS_PATH = '/content/drive/MyDrive/'
if UTILS_PATH not in sys.path:
    sys.path.append(UTILS_PATH)

# Import from sac_utils
from sac_utils import (
    load_team_stats,
    load_player_stats,
    load_keeper_stats,
    rename_team_columns,
    rename_player_columns,
    rename_keeper_columns,
    create_team_documents,
    create_player_documents,
    create_keeper_documents,
    create_document_store,
    build_pipeline,
    run_pipeline,
    DEFAULT_TEMPLATE
)

# Configuration - get from environment variable
gemini_api_key = os.environ.get('GEMINI_API_KEY')
DATA_DIR = "/content/drive/MyDrive/"

# Set page config
st.set_page_config(page_title="Soccer Analytics Coach", page_icon="⚽", layout="wide")

st.title('⚽ Soccer Assistant Coachbot ⚽')
st.image('/content/drive/MyDrive/SAC Logo.png')
st.header('RAG-Powered Tactical Assistant')

# Cache the pipeline loading
@st.cache_resource(show_spinner=False)
def load_sac_pipeline():
    with st.spinner("Loading data files..."):
        # Load data
        team_files = glob.glob(DATA_DIR + "team_stats_*.csv")
        team_stats = pd.concat([pd.read_csv(f) for f in team_files], ignore_index=True)

        player_files = glob.glob(DATA_DIR + "player_stats_*.csv")
        player_stats = pd.concat([pd.read_csv(f) for f in player_files], ignore_index=True)

        keeper_files = glob.glob(DATA_DIR + "keeper_stats_*.csv")
        keeper_stats = pd.concat([pd.read_csv(f) for f in keeper_files], ignore_index=True)

    with st.spinner("Processing data..."):
        # Rename columns
        team_stats = rename_team_columns(team_stats)
        player_stats = rename_player_columns(player_stats)
        keeper_stats = rename_keeper_columns(keeper_stats)

        # Create documents
        team_records = team_stats.fillna("").to_dict(orient="records")
        player_records = player_stats.fillna("").to_dict(orient="records")
        keeper_records = keeper_stats.fillna("").to_dict(orient="records")

        team_docs = create_team_documents(team_records)
        player_docs = create_player_documents(player_records)
        keeper_docs = create_keeper_documents(keeper_records)

    with st.spinner("Creating document store and embeddings..."):
        # Create document store
        all_docs = team_docs + player_docs + keeper_docs
        document_store = create_document_store(all_docs, embedding_model="sentence-transformers/all-MiniLM-L6-v2")

    with st.spinner("Building RAG pipeline..."):
        # Build pipeline
        pipe = build_pipeline(document_store, DEFAULT_TEMPLATE, gemini_api_key, top_k=10)

    return pipe

# Load pipeline
pipe = load_sac_pipeline()

st.success("✅ Pipeline loaded successfully!")
st.header("Chat History")

# Initialize chat history
if "sac_messages" not in st.session_state:
    st.session_state.sac_messages = []

# Display chat messages from history on app rerun
for message in st.session_state.sac_messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Chat input
if sac_prompt := st.chat_input("Ask me for tactical and coaching advice"):
    # Display user message
    st.chat_message("user").markdown(sac_prompt)

    # Add user message to chat history
    st.session_state.sac_messages.append({"role": "user", "content": sac_prompt})

    # Generate response
    with st.spinner("Generating response..."):
        result = run_pipeline(pipe, sac_prompt, top_k=10)
        response = result["answer"]

    # Display assistant response
    st.chat_message("assistant").markdown(response)

    # Add assistant message to chat history
    st.session_state.sac_messages.append({"role": "assistant", "content": response})

Overwriting /content/drive/MyDrive/sac_streamlit.py


In [None]:
import os
from pyngrok import ngrok

# Set Gemini API key as environment variable BEFORE launching streamlit
os.environ['GEMINI_API_KEY'] = userdata.get('GeminiSecret')

NGROK_API_KEY = userdata.get('NgrokSecret')
ngrok.set_auth_token(NGROK_API_KEY)

# Launch streamlit with the environment variable set
!GEMINI_API_KEY={os.environ['GEMINI_API_KEY']} nohup streamlit run /content/drive/MyDrive/sac_streamlit.py --server.port 5011 &

import time
time.sleep(5)

ngrok_tunnel = ngrok.connect(addr='5011', proto='http', bind_tls=True)
print(' * Tunnel URL:', ngrok_tunnel.public_url)

nohup: appending output to 'nohup.out'
 * Tunnel URL: https://5799fee29711.ngrok-free.app
