In [4]:
# --- Install dependencies ---
# !pip install -q transformers langchain accelerate bitsandbytes langchain_community

# --- 1. Load LLaMA 3 model locally via Hugging Face ---
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline # Import pipeline from the correct submodule
from langchain_community.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from langchain.chains import SequentialChain
import torch

# Replace with your Hugging Face token (store securely in practice)
from google.colab import userdata
hf_token = userdata.get('llama2')
model_name = "meta-llama/Llama-2-7b-chat-hf"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=hf_token,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Wrap with pipeline
llama_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.7
)

# Wrap pipeline in LangChain
llm = HuggingFacePipeline(pipeline=llama_pipeline)

# --- 2. Create PromptTemplates and LLMChains ---

# Explanation Chain
explanation_prompt = PromptTemplate(
    input_variables=["topic"],
    template="Explain the concept of {topic} in simple terms for a high school student."
)
explanation_chain = LLMChain(llm=llm, prompt=explanation_prompt, output_key="explanation")

# Quiz Chain
quiz_prompt = PromptTemplate(
    input_variables=["topic", "explanation"],
    template="Based on the topic '{topic}' and the explanation below:\n\n{explanation}\n\nWrite a quiz question."
)
quiz_chain = LLMChain(llm=llm, prompt=quiz_prompt, output_key="quiz")

# Hint Chain
hint_prompt = PromptTemplate(
    input_variables=["quiz"],
    template="Provide a helpful hint to guide a student in answering the quiz question:\n\n{quiz}"
)
hint_chain = LLMChain(llm=llm, prompt=hint_prompt, output_key="hint")

# --- 3. Combine all into a SequentialChain ---
from langchain.chains import SequentialChain

overall_chain = SequentialChain(
    chains=[explanation_chain, quiz_chain, hint_chain],
    input_variables=["topic"],
    output_variables=["explanation", "quiz", "hint"],
    verbose=True
)

# --- 4. Run the chain ---
topic = "Neural Networks"
result = overall_chain.invoke({"topic": topic})

# --- 5. Print results ---
print("\nüìò Explanation:\n", result["explanation"])
print("\n‚ùì Quiz Question:\n", result["quiz"])
print("\nüí° Hint:\n", result["hint"])

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=llama_pipeline)
  explanation_chain = LLMChain(llm=llm, prompt=explanation_prompt, output_key="explanation")




[1m> Entering new SequentialChain chain...[0m


KeyboardInterrupt: 

In [6]:
!pip install -q --upgrade transformers


[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m40.9/40.9 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m10.8/10.8 MB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
[?25h