In [None]:
!pip install transformers torch accelerate -q

In [None]:
!pip install bitsandbytes -q
!pip install sentencepiece -q
!pip install nltk spacy scikit-learn -q

In [None]:
import warnings
warnings.filterwarnings('ignore')

print("Setup complete!")

In [None]:
# System check
import torch

print("="*50)
print("SYSTEM CHECK")
print("="*50)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"GPU Memory: {gpu_mem:.1f} GB")
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    print("Apple MPS available")
else:
    print("Running on CPU - TinyLlama recommended")
print("="*50)

In [None]:
# Load Local LLM
from transformers import pipeline
import torch

print("Loading TinyLlama-1.1B-Chat...")
print("(First run downloads ~2GB)")

generator = pipeline(
    "text-generation",
    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    torch_dtype=torch.float16,
    device_map="auto"
)

print("✓ Model loaded successfully!")

In [None]:
# Helper function for text generation
def generate_response(prompt, max_tokens=150, temperature=0.3):
    """
    Generate response from local LLM.

    Args:
        prompt: The prompt text
        max_tokens: Maximum tokens to generate
        temperature: Creativity (0.1-1.5)

    Returns:
        Generated text string
    """
    chat_prompt = f"""<|system|>
You are a helpful AI assistant.</s>
<|user|>
{prompt}</s>
<|assistant|>
"""

    outputs = generator(
        chat_prompt,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=0.9,
        repetition_penalty=1.1,
        return_full_text=False
    )

    return outputs[0]['generated_text'].strip()

# Test it
print(generate_response("Say hello!", max_tokens=20))

In [None]:
# Student Information
STUDENT_NAME = "Smriti Khatiwada"
STUDENT_ID = "2347189"

# Which option did you choose?
CHOSEN_OPTION = "B"  # Change to "A", "B", or "C"

print(f"Student: {STUDENT_NAME} ({STUDENT_ID})")
print(f"Chosen Option: {CHOSEN_OPTION}")

In [None]:

news_articles = [
    {
        "id": 1,
        "title": "Tech Giants Report Record Earnings",
        "text": """Apple, Microsoft, and Google parent Alphabet all reported
        better-than-expected quarterly earnings on Tuesday. Apple's revenue
        rose 8% to $94.8 billion, driven by strong iPhone sales. CEO Tim Cook
        expressed optimism about the company's AI initiatives.""",
        "expected_topic": "Technology",
        "expected_entities": ["Apple", "Microsoft", "Google", "Tim Cook"],
    },
    {
        "id": 2,
        "title": "City Council Approves New Budget",
        "text": """The Springfield City Council voted 7-2 on Wednesday to approve
        a $500 million budget for fiscal year 2025. Mayor Jane Smith praised
        the bipartisan cooperation. The budget includes funding for new schools
        and road repairs.""",
        "expected_topic": "Politics",
        "expected_entities": ["Springfield", "Jane Smith"],
    },
    {
        "id": 3,
        "title": "Lakers Defeat Celtics in Overtime Thriller",
        "text": """LeBron James scored 42 points as the Los Angeles Lakers
        defeated the Boston Celtics 118-115 in overtime at Staples Center.
        The victory extends the Lakers' winning streak to 5 games.""",
        "expected_topic": "Sports",
        "expected_entities": ["LeBron James", "Los Angeles Lakers", "Boston Celtics"],
    },
    {
    "id": 4,
    "title": "Breakthrough in Cancer Research Announced",
    "text": """Researchers at Johns Hopkins University have developed a new
    targeted therapy showing promising results in early-stage clinical trials.
    Dr. Emily Carter stated the treatment significantly reduced tumor growth
    in 70% of patients. The study was published in the Journal of Clinical Oncology.""",
    "expected_topic": "Health",
    "expected_entities": ["Johns Hopkins University", "Emily Carter", "Journal of Clinical Oncology"],
},
{
    "id": 5,
    "title": "Tesla Unveils New Electric SUV Model",
    "text": """Tesla CEO Elon Musk introduced the company's latest electric SUV
    at an event in Austin, Texas. The Model Z is expected to have a range of
    over 400 miles per charge. Shares of Tesla rose 5% following the announcement.""",
    "expected_topic": "Business",
    "expected_entities": ["Tesla", "Elon Musk", "Austin, Texas"],
},
{
    "id": 6,
    "title": "United Nations Holds Climate Summit",
    "text": """World leaders gathered at the United Nations headquarters in
    New York to discuss global climate action. Secretary-General António Guterres
    urged countries to reduce carbon emissions before 2030. The summit focused
    on renewable energy investments and international cooperation.""",
    "expected_topic": "World",
    "expected_entities": ["United Nations", "New York", "António Guterres"],
},
{
    "id": 7,
    "title": "Amazon Expands Same-Day Delivery Service",
    "text": """Amazon announced the expansion of its same-day delivery service
    to 20 additional U.S. cities. CEO Andy Jassy said the move aims to compete
    more aggressively with Walmart. Customers in Chicago and Dallas will benefit
    from faster shipping starting next month.""",
    "expected_topic": "Business",
    "expected_entities": ["Amazon", "Andy Jassy", "Walmart", "Chicago", "Dallas"],
},
{
    "id": 8,
    "title": "NASA Successfully Launches Mars Mission",
    "text": """NASA launched its latest Mars rover aboard a SpaceX Falcon 9 rocket
    from Cape Canaveral, Florida. The mission aims to search for signs of ancient
    microbial life. NASA Administrator Bill Nelson congratulated the team on
    the successful launch.""",
    "expected_topic": "Science",
    "expected_entities": ["NASA", "SpaceX", "Cape Canaveral", "Bill Nelson"],
},

    {
    "id": 9,
    "title": "WHO Issues New Global Health Guidelines",
    "text": """The World Health Organization released updated global health
    guidelines focusing on pandemic preparedness. Director-General Tedros
    Adhanom Ghebreyesus emphasized the need for stronger international
    coordination. The announcement was made at the organization's headquarters
    in Geneva.""",
    "expected_topic": "Health",
    "expected_entities": ["World Health Organization", "Tedros Adhanom Ghebreyesus", "Geneva"],
},
{
    "id": 10,
    "title": "Meta Introduces Next-Generation VR Headset",
    "text": """Meta unveiled its latest virtual reality headset during a
    keynote event in Menlo Park, California. CEO Mark Zuckerberg said the
    device represents a major step toward building the metaverse. Analysts
    believe the launch will intensify competition with Sony and Apple.""",
    "expected_topic": "Technology",
    "expected_entities": ["Meta", "Mark Zuckerberg", "Menlo Park, California", "Sony", "Apple"],
},
{
    "id": 11,
    "title": "Supreme Court Delivers Landmark Ruling",
    "text": """The United States Supreme Court issued a landmark ruling on
    voting rights legislation. Chief Justice John Roberts wrote the majority
    opinion, stating the decision would reshape election procedures nationwide.
    Lawmakers in Washington, D.C., responded swiftly to the verdict.""",
    "expected_topic": "Politics",
    "expected_entities": ["United States Supreme Court", "John Roberts", "Washington, D.C."],
},
{
    "id": 12,
    "title": "World Bank Approves $1 Billion Infrastructure Loan",
    "text": """The World Bank approved a $1 billion loan to support infrastructure
    development projects in India. Finance Minister Nirmala Sitharaman welcomed
    the funding, saying it would boost economic growth. Construction is expected
    to begin later this year.""",
    "expected_topic": "World",
    "expected_entities": ["World Bank", "India", "Nirmala Sitharaman"],
},
{
    "id": 13,
    "title": "Oscars Ceremony Draws Global Audience",
    "text": """The 98th Academy Awards ceremony was held in Los Angeles,
    attracting millions of viewers worldwide. Actor Leonardo DiCaprio presented
    the award for Best Picture. Organizers from the Academy of Motion Picture
    Arts and Sciences reported record online engagement.""",
    "expected_topic": "Entertainment",
    "expected_entities": ["Academy Awards", "Los Angeles", "Leonardo DiCaprio", "Academy of Motion Picture Arts and Sciences"],
}
]
print(f"Option B: {len(news_articles)} news articles loaded")

In [None]:
# ═══════════════════════════════════════════════════════
# TASK 2: YOUR SECONDARY NLP TASK
# ═══════════════════════════════════════════════════════

"""
PROMPT ENGINEERING NOTES:
========================
Asked model to extract entities clearly

Specified output format

Used low temperature

Constrained output
"""
def task1_function(text):

    prompt = f"""
Classify the following news article into ONE of these categories:

Politics
Technology
Sports
Business
Entertainment
Health
World
Science

Respond with ONLY the category name.
Do not add any extra words.

Article:
{text}
"""

    response = generate_response(prompt, max_tokens=10, temperature=0.2)

    # Clean output
    prediction = response.strip().split("\n")[0]
    prediction = prediction.replace("Category:", "").strip()

    return prediction

# Test Task 1
print("Testing Task 1...")
test_result = task1_function("""The 98th Academy Awards ceremony was held in Los Angeles,
    attracting millions of viewers worldwide. Actor Leonardo DiCaprio presented
    the award for Best Picture. Organizers from the Academy of Motion Picture
    Arts and Sciences reported record online engagement.""")
print(f"Result: {test_result}")

In [None]:
# ═══════════════════════════════════════════════════════
# TASK 2: YOUR SECONDARY NLP TASK
# ═══════════════════════════════════════════════════════

"""
PROMPT ENGINEERING NOTES:
========================
Asked model to extract entities clearly

Specified output format

Used low temperature

Constrained output
"""
def task2_function(text):

    prompt = f"""
List all proper names (people, organizations, and locations)
that appear in this text.

Only list names separated by commas.
Do not explain.
Do not rewrite the article.

Text:
{text}
"""

    response = generate_response(prompt, max_tokens=60, temperature=0.1)

    # Clean the output
    cleaned = response.replace("\n", " ").strip()

    return cleaned

    # Test Task 2
print("Testing Task 2...")
test_result = task2_function("""The World Bank approved a $1 billion loan to support infrastructure
    development projects in India. Finance Minister Nirmala Sitharaman welcomed
    the funding, saying it would boost economic growth. Construction is expected
    to begin later this year""")
print(f"Result: {test_result}")

In [None]:
# ═══════════════════════════════════════════════════════
# RUN ALL TESTS (FINAL VERSION FOR OPTION B)
# ═══════════════════════════════════════════════════════

from sklearn.metrics import accuracy_score

results = []
true_labels = []
predicted_labels = []

print("="*70)
print("RUNNING TESTS")
print("="*70)

# Select dataset
if CHOSEN_OPTION == "A":
    test_data = support_tickets
elif CHOSEN_OPTION == "B":
    test_data = news_articles
else:
    test_data = recipes

for item in test_data:
    print(f"\nProcessing item {item['id']}...")

    text = item.get("text", "")

    # ---------------------------
    # TASK 1: Classification
    # ---------------------------
    prediction = task1_function(text)

    expected = item.get(
        "expected_category",
        item.get("expected_topic", "N/A")
    )

    print("Predicted:", prediction)
    print("Expected :", expected)

    true_labels.append(expected.strip())
    predicted_labels.append(prediction.strip())

    # ---------------------------
    # TASK 2 (Option B only)
    # ---------------------------
    if CHOSEN_OPTION == "B":
        entities = task2_function(text)
        print("Named Entities:", entities)
    else:
        entities = "N/A"

    # Store results
    results.append({
        "id": item["id"],
        "prediction": prediction,
        "expected": expected,
        "entities": entities
    })

# ---------------------------
# Accuracy Calculation
# ---------------------------
accuracy = accuracy_score(true_labels, predicted_labels)

print("\n" + "="*70)
print(f"Processed {len(results)} test cases")
print(f"Classification Accuracy: {accuracy:.2f}")
print("="*70)

In [None]:
# ═══════════════════════════════════════════════════════
# RESULTS TABLE (FINAL VERSION)
# ═══════════════════════════════════════════════════════

import pandas as pd

df = pd.DataFrame(results)

# Add correctness column
df["Correct"] = df["prediction"].str.strip() == df["expected"].str.strip()

print("\nRESULTS SUMMARY:")
print("="*70)

print(df.to_string(index=False))

print("\nOverall Accuracy:",
      round(df["Correct"].mean(), 2))