**1)Anlamsal Analiz**


Hugging Face ile Sözcük Anlamı *Karşılaştırması*



In [1]:
from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np

model_name = 'sentence-transformers/all-MiniLM-L6-v2'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

sentences = ["The cat is sitting on the mat.", "A feline is on the rug."]

inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')


with torch.no_grad():
    outputs = model(**inputs)


embeddings = outputs.last_hidden_state.mean(dim=1)


def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

similarity_score = cosine_similarity(embeddings[0].numpy(), embeddings[1].numpy())
print(f"Semantic Similarity: {similarity_score:.4f}")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Semantic Similarity: 0.4656


Cümle Benzerliklerini Farklı Modellerle Karşılaştırma

In [2]:
from sentence_transformers import SentenceTransformer, util
import torch

models = [
    'sentence-transformers/all-MiniLM-L6-v2',
    'sentence-transformers/all-mpnet-base-v2',
    'sentence-transformers/paraphrase-MiniLM-L6-v2'
]


sentence1 = "The cat is sitting on the mat."
sentence2 = "A feline is on the rug."

print(f"Sentence 1: {sentence1}")
print(f"Sentence 2: {sentence2}\n")

for model_name in models:
    print(f"Model: {model_name}")


    model = SentenceTransformer(model_name)


    embedding1 = model.encode(sentence1, convert_to_tensor=True)
    embedding2 = model.encode(sentence2, convert_to_tensor=True)


    similarity_score = util.cos_sim(embedding1, embedding2).item()

    print(f"Semantic Similarity: {similarity_score:.4f}\n")


Sentence 1: The cat is sitting on the mat.
Sentence 2: A feline is on the rug.

Model: sentence-transformers/all-MiniLM-L6-v2


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Semantic Similarity: 0.4656

Model: sentence-transformers/all-mpnet-base-v2


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Semantic Similarity: 0.6865

Model: sentence-transformers/paraphrase-MiniLM-L6-v2


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Semantic Similarity: 0.4340



Cümlelerin Anlamsal Kümelemesi (Clustering)

In [3]:
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

sentences = [
    "The dog barked loudly.",
    "The puppy made noise.",
    "He solved the math problem.",
    "She calculated the answer.",
    "The sun is shining today.",
    "It is a bright and sunny day."
]

# Modeli yükle
model = SentenceTransformer('all-MiniLM-L6-v2')

# Cümle embedding'lerini elde et
sentence_embeddings = model.encode(sentences)

# Kümeleme (clustering)
kmeans = KMeans(n_clusters=3, random_state=0).fit(sentence_embeddings)

# Sonuçları yazdır
for i, label in enumerate(kmeans.labels_):
    print(f"Sentence: {sentences[i]} -> Cluster {label}")


Sentence: The dog barked loudly. -> Cluster 2
Sentence: The puppy made noise. -> Cluster 2
Sentence: He solved the math problem. -> Cluster 0
Sentence: She calculated the answer. -> Cluster 0
Sentence: The sun is shining today. -> Cluster 1
Sentence: It is a bright and sunny day. -> Cluster 1


Aynı Anlamlı Cümleleri Anlamsal Benzerlik ile Karşılaştırma



In [4]:
from sentence_transformers import SentenceTransformer, util
import torch

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

sentence_pairs = [
    ("He is driving a car.", "A man is operating a vehicle."),
    ("She enjoys reading books.", "Reading is a hobby she likes."),
    ("The sun is shining brightly.", "It’s a very cloudy day."),
    ("He ate an apple.", "He consumed a fruit."),
]

print("Anlamsal Benzerlik Skorları:\n")


for s1, s2 in sentence_pairs:
    embedding1 = model.encode(s1, convert_to_tensor=True)
    embedding2 = model.encode(s2, convert_to_tensor=True)

    similarity = util.cos_sim(embedding1, embedding2).item()

    print(f"Cümle 1: {s1}")
    print(f"Cümle 2: {s2}")
    print(f"Benzerlik Skoru: {similarity:.4f}\n")


Anlamsal Benzerlik Skorları:

Cümle 1: He is driving a car.
Cümle 2: A man is operating a vehicle.
Benzerlik Skoru: 0.6113

Cümle 1: She enjoys reading books.
Cümle 2: Reading is a hobby she likes.
Benzerlik Skoru: 0.8161

Cümle 1: The sun is shining brightly.
Cümle 2: It’s a very cloudy day.
Benzerlik Skoru: 0.4316

Cümle 1: He ate an apple.
Cümle 2: He consumed a fruit.
Benzerlik Skoru: 0.7462



0.9–1.0: Çok yüksek benzerlik

0.6–0.9: Anlamsal olarak benzer

< 0.5: Anlam farkı olabilir

**2)Duygu Analizi**

In [5]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")

texts = [
    "This product is amazing!",
    "I'm really disappointed in the service.",
    "It was okay, not great but not terrible."
]

results = classifier(texts)

for text, result in zip(texts, results):
    print(f"\nText: {text}")
    print(f"Label: {result['label']} (Score: {result['score']:.2f})")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu



Text: This product is amazing!
Label: POSITIVE (Score: 1.00)

Text: I'm really disappointed in the service.
Label: NEGATIVE (Score: 1.00)

Text: It was okay, not great but not terrible.
Label: POSITIVE (Score: 1.00)


 Türkçe Model ile Duygu Analizi

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

model_name = "savasy/bert-base-turkish-sentiment-cased"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

text = "Film harikaydı, çok beğendim."

inputs = tokenizer(text, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits
    probs = F.softmax(logits, dim=1)

print("Pozitiflik skoru:", probs[0][1].item())


tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/442M [00:00<?, ?B/s]

Pozitiflik skoru: 0.9765626788139343
