# list of model which can be access through groq account

In [1]:
import os, requests
BASE = "https://api.groq.com/openai/v1"
HEAD = {"Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}"}
models = requests.get(f"{BASE}/models", headers=HEAD, timeout=30).json()
print([m["id"] for m in models.get("data", [])])


['llama-3.3-70b-versatile', 'deepseek-r1-distill-llama-70b', 'compound-beta', 'compound-beta-mini', 'meta-llama/llama-prompt-guard-2-86m', 'gemma2-9b-it', 'meta-llama/llama-4-scout-17b-16e-instruct', 'playai-tts-arabic', 'qwen/qwen3-32b', 'meta-llama/llama-guard-4-12b', 'moonshotai/kimi-k2-instruct', 'openai/gpt-oss-120b', 'openai/gpt-oss-20b', 'whisper-large-v3-turbo', 'whisper-large-v3', 'allam-2-7b', 'playai-tts', 'meta-llama/llama-4-maverick-17b-128e-instruct', 'llama-3.1-8b-instant', 'meta-llama/llama-prompt-guard-2-22m']


In [15]:
import os, time, json, requests
BASE = "https://api.groq.com/openai/v1"
HEAD = {"Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}"}

def list_models():
    r = requests.get(f"{BASE}/models", headers=HEAD, timeout=30)
    r.raise_for_status()
    # return only the model ids
    return [m["id"] for m in r.json().get("data", [])]

def try_chat(model_id: str):
    """Send a tiny chat request and return (ok, reason, extra)."""
    url = f"{BASE}/chat/completions"
    payload = {
        "model": model_id,
        "messages": [{"role": "user", "content": "ping"}],
        "max_tokens": 8,
        "temperature": 0.0,
    }
    try:
        r = requests.post(url, headers=HEAD, json=payload, timeout=30)
        # Capture rate-limit headers if present
        rate = {k:v for k,v in r.headers.items() if k.lower().startswith("x-ratelimit")}
        if r.status_code == 200:
            return True, "ok", rate
        # Parse Groq error body
        err = {}
        try:
            err = r.json()
        except Exception:
            err = {"raw": r.text}
        # Heuristics for common failures
        code = r.status_code
        msg = (err.get("error") or {}).get("message", "")
        typ = (err.get("error") or {}).get("type", "")
        if code == 401:
            reason = "auth_error (invalid/expired API key or missing bearer)"
        elif code == 403:
            reason = "forbidden (model not enabled for this account)"
        elif code == 404:
            reason = "model_not_found (typo or de-listed model id)"
        elif code == 409:
            reason = "concurrency limit reached"
        elif code == 422:
            reason = "validation_error (bad payload)"
        elif code == 429:
            # Could be rate limit OR quota exceeded
            if "quota" in msg.lower():
                reason = "quota_exceeded (billing/credit cap reached)"
            else:
                reason = "rate_limited (too many requests)"
        elif code == 400:
            # Often context-length issues show up as 400 with a message
            if "maximum context length" in msg.lower():
                reason = "context_limit (prompt+max_tokens too large)"
            else:
                reason = "bad_request"
        else:
            reason = f"unexpected_{code}"
        # Attach short message + rate headers for debugging
        extra = {"msg": msg[:200], "type": typ, **({"rate": rate} if rate else {})}
        return False, reason, extra
    except requests.RequestException as e:
        return False, "network_error", {"exc": str(e)}

# Filter: only test chat models; skip obvious TTS/Whisper/guards if you want
SKIP_PREFIXES = ("playai-tts", "whisper", "meta-llama/llama-guard", "meta-llama/llama-prompt-guard")

models = [m for m in list_models() if not m.startswith(SKIP_PREFIXES)]
results = []
for m in models:
    ok, reason, extra = try_chat(m)
    results.append({"model": m, "status": "ok" if ok else "fail", "reason": reason, "extra": extra})
    # small delay to avoid rate limits
    time.sleep(0.2)

# Pretty print a compact table
from pprint import pprint
pprint(results)


[{'extra': {'x-ratelimit-limit-requests': '1000',
            'x-ratelimit-limit-tokens': '8000',
            'x-ratelimit-remaining-requests': '999',
            'x-ratelimit-remaining-tokens': '7927',
            'x-ratelimit-reset-requests': '1m26.4s',
            'x-ratelimit-reset-tokens': '547.5ms'},
  'model': 'openai/gpt-oss-120b',
  'reason': 'ok',
  'status': 'ok'},
 {'extra': {'x-ratelimit-limit-requests': '1000',
            'x-ratelimit-limit-tokens': '6000',
            'x-ratelimit-remaining-requests': '999',
            'x-ratelimit-remaining-tokens': '5993',
            'x-ratelimit-reset-requests': '1m26.4s',
            'x-ratelimit-reset-tokens': '70ms'},
  'model': 'meta-llama/llama-4-maverick-17b-128e-instruct',
  'reason': 'ok',
  'status': 'ok'},
 {'extra': {'x-ratelimit-limit-requests': '1000',
            'x-ratelimit-limit-tokens': '8000',
            'x-ratelimit-remaining-requests': '999',
            'x-ratelimit-remaining-tokens': '7927',
            'x-r

In [9]:
# Uncomment only one model line below:

# MODEL = "llama2"           
# MODEL = "llama-3.1-8b-instant"


In [13]:
# import os
# from langchain_openai import ChatOpenAI, OpenAIEmbeddings
# from langchain_community.llms import Ollama
# from langchain_community.embeddings import OllamaEmbeddings

# GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# # Choose any model you want
# MODEL = "llama-3.1-8b-instant"   # Example: Groq model
# # MODEL = "deepseek-r1-distill-llama-70b"  # Example: non-Groq model

# # Groq-supported model prefixes
# GROQ_MODELS = ["gemma", "llama3", "mixtral", "qwen"]

# try:
#     if any(MODEL.startswith(m) for m in GROQ_MODELS):
#         print(f"✅ Using Groq model: {MODEL}")
#         model = ChatOpenAI(
#             api_key=GROQ_API_KEY,
#             model=MODEL,
#             base_url="https://api.groq.com/openai/v1"
#         )
#         embeddings = OpenAIEmbeddings(
#             api_key=GROQ_API_KEY,
#             model=os.getenv("GROQ_EMBED_MODEL", "text-embedding-3-small"),
#             base_url="https://api.groq.com/openai/v1"
#         )
#         _ = model.invoke("ping")  # sanity check
#     else:
#         raise ValueError("Model not supported by Groq.")
# except Exception as e:
#     print(f"⚠️ Groq failed or model not supported: {e}")
#     print("👉 Switching to Ollama with model: llama2")
#     model = Ollama(model="llama2")
#     embeddings = OllamaEmbeddings()

# print("LLM and Embeddings initialized successfully!")


⚠️ Groq failed or model not supported: Model not supported by Groq.
👉 Switching to Ollama with model: llama2
LLM and Embeddings initialized successfully!


In [19]:
import os
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Choose your model
MODEL = "llama-3.1-8b-instant"  

try:
    print(f"🔎 Trying Groq model: {MODEL}")
    model = ChatOpenAI(
        api_key=GROQ_API_KEY,
        model=MODEL,
        base_url="https://api.groq.com/openai/v1"
    )
    embeddings = OpenAIEmbeddings(
        api_key=GROQ_API_KEY,
        model=os.getenv("GROQ_EMBED_MODEL", "text-embedding-3-small"),
        base_url="https://api.groq.com/openai/v1"
    )
    
    # Tiny test request to check quota/context
    _ = model.invoke("ping")
    print(f"Using Groq model: {MODEL}")

except Exception as e:
    # Catch all Groq-related issues (quota exceeded, rate limit, auth, etc.)
    print(f"Groq failed: {e}")
    print(" Switching to Ollama with model: llama2")
    model = Ollama(model="llama2")
    embeddings = OllamaEmbeddings()

print("LLM and Embeddings initialized successfully!")


🔎 Trying Groq model: llama-3.1-8b-instant
Using Groq model: llama-3.1-8b-instant
LLM and Embeddings initialized successfully!


In [18]:
response = model.invoke("What's something cool about black holes?")
print(response)


content="Black holes are indeed one of the most fascinating and mysterious objects in the universe. Here are a few cool facts about them:\n\n1. **Event Horizons**: Black holes have an event horizon, which is the point of no return around a black hole. Once something crosses the event horizon, it gets trapped by the black hole's gravity and can't escape.\n\n2. **Warped Space-Time**: Black holes warp the fabric of space-time around them, creating a region known as the ergosphere. This region can even trap and rotate the rotation of objects that enter it.\n\n3. **Gravitational Pull**: The gravitational pull of a black hole is so strong that not even light can escape once it gets too close to the event horizon. This is why black holes are invisible to us, even though they can be detected by their effects on the surrounding environment.\n\n4. **Information Paradox**: The theory of general relativity suggests that anything that falls into a black hole gets destroyed, including information. H

In [None]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()
chain = model | parser
chain.invoke("Tell me a joke")


"Sure! Here's one:\n\nWhy don't scientists trust atoms?\nBecause they make up everything!\n\nI hope that brought a smile to your face! Do you have a favorite type of joke or topic you'd like to hear a joke about?"

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("Artificial Intelligence.pdf")
pages =  loader.load_and_split()
pages


[Document(metadata={'producer': 'Skia/PDF m141 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Artificial Intelligence', 'source': 'Artificial Intelligence.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='What  is  Artificial  Intelligence?  \nComplete\n \nUnderstanding\n \n When  most  people  hear  the  term  Artificial  Intelligence,  the  first  thing  they  usually  think  of  is  AI  \nrobots.\n \nThat’s\n \nbecause\n \nmovies\n \nand\n \nnovels\n \nweave\n \nstories\n \nabout\n \nhuman-like\n \nmachines\n \nthat\n \nwreak\n \nhavoc\n \non\n \nEarth.\n \nBut\n \nnothing\n \ncould\n \nbe\n \nfurther\n \nfrom\n \nthe\n \ntruth.\n  Today,  in  the  realm  of  digitalization,  artificial  intelligence  is  unlocking  unprecedented  \npossibilities,\n \nwhere\n \nmachines\n \nnot\n \nonly\n \nmimic\n \nhuman\n \nintelligence\n \nbut\n \nalso\n \ntransform\n \nthe\n \ncomplexities\n \nof\n \nthe\n \nsoftware\n \nbusiness\n \nworld.\n  Histor

In [None]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't answer the question, reply 'I don't know'.

Context: {context}
Question: {question}

"""
prompt = PromptTemplate.from_template(template)
print(prompt.format(context = "I am a human being", question = "What is my name"))


Answer the question based on the context below. If you can't answer the question, reply 'I don't know'.

Context: I am a human being
Question: What is my name




In [None]:
chain = prompt | model | parser 


In [None]:
chain.invoke(
    {
        "context": "The name i was given was Santiago",
        "question": "What's my name "
    }
)

'Hi Santiago! Your name is Santiago.'

In [None]:
from langchain_community.vectorstores import FAISS

texts, metas = [], []
for d in pages:
    pc = getattr(d, "page_content", None)
    if isinstance(pc, bytes):
        pc = pc.decode("utf-8", errors="ignore")
    if pc is None:
        continue
    s = pc if isinstance(pc, str) else str(pc)
    s = s.strip()
    if not s:
        continue
    texts.append(s)
    metas.append(getattr(d, "metadata", {}))

vectorstore = FAISS.from_texts(texts, embedding=embeddings, metadatas=metas)
retriever = vectorstore.as_retriever()

print(f"FAISS index built from {len(texts)} chunks.")


FAISS index built from 2 chunks.


In [None]:
retriever = vectorstore.as_retriever()
retriever.invoke("Machine learning" )

[Document(id='c5a820c9-5360-46f4-ac0a-dc21a8f99259', metadata={'producer': 'Skia/PDF m141 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Artificial Intelligence', 'source': 'Artificial Intelligence.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='What  is  Artificial  Intelligence?  \nComplete\n \nUnderstanding\n \n When  most  people  hear  the  term  Artificial  Intelligence,  the  first  thing  they  usually  think  of  is  AI  \nrobots.\n \nThat’s\n \nbecause\n \nmovies\n \nand\n \nnovels\n \nweave\n \nstories\n \nabout\n \nhuman-like\n \nmachines\n \nthat\n \nwreak\n \nhavoc\n \non\n \nEarth.\n \nBut\n \nnothing\n \ncould\n \nbe\n \nfurther\n \nfrom\n \nthe\n \ntruth.\n  Today,  in  the  realm  of  digitalization,  artificial  intelligence  is  unlocking  unprecedented  \npossibilities,\n \nwhere\n \nmachines\n \nnot\n \nonly\n \nmimic\n \nhuman\n \nintelligence\n \nbut\n \nalso\n \ntransform\n \nthe\n \ncomplexities\n \nof\n \nthe\n \

In [None]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question")
    }
    | prompt
    | model
    | parser

)

result = chain.invoke({"question": "What is machine learning?"})
print(result)

In [None]:
questions = [
    "What is the difference between Machine Learning and Deep Learning",
    "Briefly describe the history of Artificial Intelligence",
    "How do companies use Artificial Intelligence tools to gain a competitive advantage, and what is the primary benefit of adopting AI?",
    "What are some of the concerns and risks associated with Artificial Intelligence, and why is responsible governance important?"


]

for question in questions:
    print("Question:", question)
    print("Answer:", chain.invoke({"question": question}))
    print()



Question: What is the difference between Machine Learning and Deep Learning
Answer: The answer to your question is:

Machine learning and deep learning are both subfields of artificial intelligence (AI) that involve training algorithms to make predictions or decisions based on data. The main difference between them is in their approach and architecture.

Machine learning is a broader concept that encompasses algorithms and techniques allowing systems to learn patterns and make predictions based on data. It means that a system is being trained with vast data to recognize patterns and make accurate decisions.

Deep learning, on the other hand, is a subset of machine learning that focuses on replicating the learning and decision-making architecture of the human brain. Its technology teaches systems to recognize complex data patterns using AI. It is often used in natural language processing (NLP), speech recognition, and image recognition.

The advantages and limitations of artificial inte