In [5]:
# Install required libraries
!pip install --upgrade pip
!pip install transformers datasets sentence-transformers fastai[transformers] torch torchvision torchaudio kaggle --quiet

# Verify installs
import pkgutil
for pkg in ("transformers","datasets","sentence_transformers","fastai","kaggle","torch"):
    print(pkg, "installed:", pkgutil.find_loader(pkg) is not None)


Collecting pip
  Downloading pip-25.3-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.3-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.3
[0mtransformers installed: True
datasets installed: True
sentence_transformers installed: True
fastai installed: True
kaggle installed: True
torch installed: True


  print(pkg, "installed:", pkgutil.find_loader(pkg) is not None)


In [6]:
from transformers import pipeline

sentiment = pipeline("sentiment-analysis")

texts = [
    "I love this product, it works perfectly.",
    "This is the worst experience I have ever had.",
    "The movie was okay, not great but not terrible.",
    "Amazing performance and great story!",
    "Customer service was very disappointing."
] * 40
results = sentiment(texts)

results[:5]


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9998784065246582},
 {'label': 'NEGATIVE', 'score': 0.9997627139091492},
 {'label': 'POSITIVE', 'score': 0.9776669144630432},
 {'label': 'POSITIVE', 'score': 0.999883770942688},
 {'label': 'NEGATIVE', 'score': 0.999774158000946}]

In [7]:
import csv

with open("sentiment_results.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["text", "label", "score"])
    for t, r in zip(texts, results):
        writer.writerow([t, r["label"], r["score"]])

print("Saved sentiment_results.csv")


Saved sentiment_results.csv


In [10]:
from transformers import pipeline

summarizer = pipeline("summarization")

articles = [
    """Artificial intelligence has seen rapid growth in recent years.
    Large language models are now capable of understanding and generating text
    at near human levels. Researchers continue to explore ethical and safety concerns.""",

    """Climate change is impacting ecosystems worldwide. Rising temperatures,
    melting glaciers, and extreme weather events pose serious risks to biodiversity."""
] * 5

summaries = []

for article in articles:
    summary = summarizer(article, max_length=20, min_length=10, do_sample=False)[0]["summary_text"]
    summaries.append(summary)

summaries[:3]


No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


[' Artificial intelligence has seen rapid growth in recent years . Large language models are now capable of',
 ' Climate change is impacting ecosystems worldwide . Rising temperatures, melting glaciers, and extreme weather events',
 ' Artificial intelligence has seen rapid growth in recent years . Large language models are now capable of']

In [12]:
from transformers import pipeline

en_fr = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
fr_en = pipeline("translation_fr_to_en", model="Helsinki-NLP/opus-mt-fr-en")

sentences = [
    "Machine learning is changing the world.",
    "Artificial intelligence is fascinating.",
    "This course is very informative."
] * 7   # 21 sentences

translations = []

for s in sentences:
    fr = en_fr(s)[0]["translation_text"]
    back = fr_en(fr)[0]["translation_text"]
    translations.append((s, fr, back))

translations[:3]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

Device set to use cpu


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

Device set to use cpu


[('Machine learning is changing the world.',
  "L'apprentissage automatique change le monde.",
  'Machine learning is changing the world.'),
 ('Artificial intelligence is fascinating.',
  "L'intelligence artificielle est fascinante.",
  'Artificial intelligence is fascinating.'),
 ('This course is very informative.',
  'Ce cours est très instructif.',
  'This course is very instructive.')]

In [13]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer("all-MiniLM-L6-v2")

faqs = [f"How do I reset my password question {i}?" for i in range(100)]
faq_embeddings = model.encode(faqs, convert_to_tensor=True)

query = "How can I change my password?"
query_embedding = model.encode(query, convert_to_tensor=True)

scores = util.pytorch_cos_sim(query_embedding, faq_embeddings)[0]

top_results = scores.topk(3)

for idx in top_results.indices:
    print(faqs[idx])


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

How do I reset my password question 3?
How do I reset my password question 2?
How do I reset my password question 4?


In [27]:
# ==============================
# FastAI IMDb Text Classification
# Progress-bar safe for Colab
# ==============================

# ---- Fix fastprogress / Colab bug ----
import fastprogress.fastprogress as fpd
fpd.force_console_is_str = True
fpd.NO_BAR = True

from fastai.callback.progress import ProgressCallback

# ---- Imports ----
from datasets import load_dataset
import pandas as pd
from fastai.text.all import *

# ---- Load IMDb dataset ----
ds = load_dataset("imdb")

# Use smaller subset (lab-friendly)
train_ds = ds["train"].shuffle(seed=42).select(range(4000))
test_ds  = ds["test"].shuffle(seed=42).select(range(2000))

# Convert to DataFrame
train_df = pd.DataFrame(train_ds)
test_df  = pd.DataFrame(test_ds)

# ---- Create DataLoaders ----
dls = TextDataLoaders.from_df(
    train_df,
    text_col="text",
    label_col="label",
    valid_pct=0.1,
    bs=32
)

# ---- Create learner WITHOUT progress bars ----
learn = text_classifier_learner(
    dls,
    AWD_LSTM,
    metrics=accuracy,
    cbs=[]
)

# Remove any automatically added progress callbacks
learn.remove_cbs(ProgressCallback)

# ---- Train model (1 epoch) ----
learn.fine_tune(1)

# ---- Validate & print accuracy ----
val_loss, val_acc = learn.validate()
print(f"Validation Accuracy: {val_acc:.4f}")


KeyboardInterrupt: 

In [26]:
from transformers import pipeline

# Create translation pipelines
en_to_fr = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
fr_to_en = pipeline("translation_fr_to_en", model="Helsinki-NLP/opus-mt-fr-en")

# Sentences to translate
sentences = [
    "Machine learning is changing the world.",
    "Artificial intelligence is used in many applications.",
    "This lab is very helpful for understanding NLP."
]

print("ENGLISH → FRENCH → ENGLISH\n")

for s in sentences:
    french = en_to_fr(s)[0]["translation_text"]
    back_to_english = fr_to_en(french)[0]["translation_text"]

    print("Original English :", s)
    print("French           :", french)
    print("Back to English  :", back_to_english)
    print("-" * 50)

Device set to use cpu
Device set to use cpu


ENGLISH → FRENCH → ENGLISH

Original English : Machine learning is changing the world.
French           : L'apprentissage automatique change le monde.
Back to English  : Machine learning is changing the world.
--------------------------------------------------
Original English : Artificial intelligence is used in many applications.
French           : L'intelligence artificielle est utilisée dans de nombreuses applications.
Back to English  : Artificial intelligence is used in many applications.
--------------------------------------------------
Original English : This lab is very helpful for understanding NLP.
French           : Ce laboratoire est très utile pour comprendre le NLP.
Back to English  : This laboratory is very useful for understanding NLP.
--------------------------------------------------


In [28]:
from google.colab import drive
drive.mount("/content/drive")

!cp translation_results_en_fr_en.csv /content/drive/MyDrive/
print("File copied to Google Drive")

Mounted at /content/drive
cp: cannot stat 'translation_results_en_fr_en.csv': No such file or directory
File copied to Google Drive
