--- WiDiD: Incremental Word Sense Discovery for Parliamentary Speeches ---
Analyzes all available term-year combinations in the data

In [1]:
# ##Â Imports 
%pip install "elasticsearch==8.6.2" sentence-transformers scikit-learn pandas matplotlib
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer
from sklearn.cluster import AffinityPropagation
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re, os


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
# ## Configuration

# Data source selection: "elasticsearch" or "csv"
DATA_SOURCE = "csv"


INDEX_NAME = "parliament_speeches"
ES_URL = "http://localhost:9200"   # adjust if different

CSV_PATH = "../data/speeches_full.csv"

# Analysis configuration
TARGET_WORDS = ["katar", "salÃ§a"] 
OUTPUT_DIR = "./widid_results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [3]:
# ## Connect to Elasticsearch
if DATA_SOURCE == "elasticsearch":
    es = Elasticsearch(ES_URL)
    print("Connected to Elasticsearch ")
    print(es.info().body["version"]["number"])
else:
    es = None
    print("Using CSV data source - skipping Elasticsearch connection")

Using CSV data source - skipping Elasticsearch connection


## Helper Functions

In [None]:
def _ensure_csv_cache():
    """Ensure CSV cache is loaded if using CSV source."""
    global _csv_cache
    if DATA_SOURCE == "csv" and '_csv_cache' not in globals():
        print("Loading CSV data into memory...")
        _csv_cache = pd.read_csv(CSV_PATH)
        _csv_cache = _csv_cache.dropna(subset=['content', 'term', 'year'])
        print(f"Loaded {len(_csv_cache)} speeches from CSV")


def get_term_year_combinations():
    """Get all available term-year combinations from the data source."""
    if DATA_SOURCE == "elasticsearch":
        # Query for all unique term-year combinations
        query = {
            "size": 0,
            "aggs": {
                "by_term": {
                    "terms": {"field": "term", "size": 1000},
                    "aggs": {
                        "by_year": {
                            "terms": {"field": "year", "size": 100}
                        }
                    }
                }
            }
        }
        res = es.search(index=INDEX_NAME, body=query)
        
        combinations = []
        for term_bucket in res["aggregations"]["by_term"]["buckets"]:
            term = term_bucket["key"]
            for year_bucket in term_bucket["by_year"]["buckets"]:
                year = year_bucket["key"]
                combinations.append((term, year))
        return sorted(combinations)
    
    elif DATA_SOURCE == "csv":
        # Ensure cache is loaded
        _ensure_csv_cache()
        # Use cached CSV data to get unique term-year combinations
        combinations = _csv_cache[['term', 'year']].drop_duplicates().sort_values(['term', 'year'])
        return list(combinations.itertuples(index=False, name=None))
    
    else:
        raise ValueError(f"Invalid DATA_SOURCE: {DATA_SOURCE}. Must be 'elasticsearch' or 'csv'")


def fetch_speeches(term, year, size=10000):
    """Fetch speeches for a specific term and year from either Elasticsearch or CSV."""
    if DATA_SOURCE == "elasticsearch":
        query = {
            "size": size,
            "_source": ["content", "term", "year"],
            "query": {
                "bool": {
                    "must": [
                        {"term": {"term": term}},
                        {"term": {"year": year}}
                    ]
                }
            }
        }
        res = es.search(index=INDEX_NAME, body=query)
        return [hit["_source"]["content"] for hit in res["hits"]["hits"]]
    
    elif DATA_SOURCE == "csv":

        _ensure_csv_cache()

        filtered_df = _csv_cache[(_csv_cache['term'] == term) & (_csv_cache['year'] == year)]
        speeches = filtered_df['content'].tolist()
        return speeches[:size]  # Limit to size parameter
    
    else:
        raise ValueError(f"Invalid DATA_SOURCE: {DATA_SOURCE}. Must be 'elasticsearch' or 'csv'")

In [5]:
def extract_contexts(texts, target_word, window=10):
    """Extract short context windows around target word and its morphological variations."""
    contexts = []
    # Create regex pattern to match target word and any Turkish suffixes
    pattern = re.compile(rf"\b{re.escape(target_word.lower())}\w*\b")
    
    for t in texts:
        tokens = re.findall(r"\w+", t.lower()) # simple tokenization, one or more word characters 
        for i, tok in enumerate(tokens):
            # Use regex to match the word and its variations
            if pattern.match(tok):
                start = max(0, i - window)
                end = min(len(tokens), i + window + 1)
                snippet = " ".join(tokens[start:end])
                contexts.append(snippet)
    return contexts

In [6]:
def compute_embeddings(model, contexts):
    """Compute embeddings for context snippets."""
    if len(contexts) == 0:
        return np.empty((0, model.get_sentence_embedding_dimension()))
    return model.encode(contexts, show_progress_bar=True)

In [7]:
def get_cluster_prototypes(X, labels):
    """Compute centroids for each cluster."""
    clusters = []
    for label in np.unique(labels):
        members = X[labels == label]
        centroid = np.mean(members, axis=0)
        clusters.append(centroid)
    return np.array(clusters)

In [8]:
def show_top_contexts(contexts, labels, n=3):
    """Print representative contexts for each cluster."""
    df = pd.DataFrame({"cluster": labels, "context": contexts})
    grouped = df.groupby("cluster")["context"].apply(list)
    for cluster, examples in grouped.items():
        print(f"\nðŸŒ€ Cluster {cluster} ({len(examples)} examples):")
        for ex in examples[:n]:
            print("   â€¢", ex[:200].replace("\n", " ") + ("..." if len(ex) > 200 else ""))

In [9]:
# ## Load Sentence Transformer Model
model = SentenceTransformer("all-MiniLM-L6-v2") 
print("Model loaded ")

Model loaded 


In [10]:
# ## Get all available term-year combinations
print("Getting all term-year combinations from data...")
all_combinations = get_term_year_combinations()
print(f"Found {len(all_combinations)} term-year combinations:")
for term, year in all_combinations:
    print(f"  - Term {term}, Year {year}")


Getting all term-year combinations from data...
Loading CSV data into memory...
Loaded 30106 speeches from CSV
Found 19 term-year combinations:
  - Term 23, Year 2
  - Term 23, Year 3
  - Term 23, Year 4
  - Term 23, Year 5
  - Term 24, Year 1
  - Term 24, Year 2
  - Term 24, Year 3
  - Term 25, Year 1
  - Term 25, Year 2
  - Term 26, Year 1
  - Term 26, Year 2
  - Term 26, Year 3
  - Term 27, Year 1
  - Term 27, Year 2
  - Term 27, Year 3
  - Term 27, Year 4
  - Term 27, Year 5
  - Term 27, Year 6
  - Term 28, Year 1


In [None]:
if DATA_SOURCE == "csv":
    try:
        if '_csv_cache' not in globals():
            print("Pre-loading CSV data into memory...")
            _csv_cache = pd.read_csv(CSV_PATH)
            _csv_cache = _csv_cache.dropna(subset=['content', 'term', 'year'])
            print(f"Loaded {len(_csv_cache)} speeches from CSV")
            print(f"Available terms: {sorted(_csv_cache['term'].unique())}")
            print(f"Available years: {sorted(_csv_cache['year'].unique())}")
        else:
            print("CSV cache already loaded")
    except NameError:
        # Cache not initialized yet
        pass
else:
    _csv_cache = None


CSV cache already loaded


In [None]:
# Generate t-SNE plots for all term-year combinations
print("\n=== Generating t-SNE plots for all term-year combinations ===\n")

for target_word in TARGET_WORDS:
    print(f"\n  Processing '{target_word}'...")
    
    for term, year in all_combinations:
        print(f"    Term {term}, Year {year}: ", end="")
        
        # Fetch data
        texts = fetch_speeches(term, year)
        
        # Extract contexts
        contexts = extract_contexts(texts, target_word)
        print(f"{len(contexts)} contexts")
        
        if len(contexts) < 10:  # Lower threshold for individual plots
            print(f"      Skipping - insufficient contexts")
            continue
        
        # Compute embeddings
        emb = compute_embeddings(model, contexts)
        
        # Cluster with AffinityPropagation
        ap = AffinityPropagation(random_state=42)
        labels = ap.fit_predict(emb)
        
        # Generate t-SNE visualization
        
        n_samples = len(emb)
        perplexity = min(30, max(5, n_samples - 1))  # Use 30 if enough samples, otherwise adapt
        
        tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity).fit_transform(emb)
        
        plt.figure(figsize=(10,8))
        scatter = plt.scatter(tsne[:,0], tsne[:,1], c=labels, cmap="tab20", alpha=0.6, s=50)
        plt.colorbar(scatter, label="Cluster ID")
        plt.title(f"'{target_word}' Senses\nTerm {term}, Year {year} (n={len(contexts)}, clusters={len(np.unique(labels))}, perplexity={perplexity})", fontsize=12)
        plt.xlabel("t-SNE dimension 1", fontsize=10)
        plt.ylabel("t-SNE dimension 2", fontsize=10)
        
        tsne_path = os.path.join(OUTPUT_DIR, f"tsne_term{term}_y{year}_{target_word}.png")
        plt.savefig(tsne_path, bbox_inches="tight", dpi=150)
        plt.close()
        
        print(f"      Saved: {os.path.basename(tsne_path)}")

print("\nâœ“ All t-SNE plots generated!")



=== Generating t-SNE plots for all term-year combinations ===


  Processing 'katar'...
    Term 23, Year 2: 0 contexts
      Skipping - insufficient contexts
    Term 23, Year 3: 1 contexts
      Skipping - insufficient contexts
    Term 23, Year 4: 7 contexts
      Skipping - insufficient contexts
    Term 23, Year 5: 4 contexts
      Skipping - insufficient contexts
    Term 24, Year 1: 0 contexts
      Skipping - insufficient contexts
    Term 24, Year 2: 18 contexts


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


      Saved: tsne_term24_y2_katar.png
    Term 24, Year 3: 15 contexts


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

      Saved: tsne_term24_y3_katar.png
    Term 25, Year 1: 0 contexts
      Skipping - insufficient contexts
    Term 25, Year 2: 0 contexts
      Skipping - insufficient contexts
    Term 26, Year 1: 34 contexts


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

      Saved: tsne_term26_y1_katar.png
    Term 26, Year 2: 164 contexts


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

      Saved: tsne_term26_y2_katar.png
    Term 26, Year 3: 85 contexts


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

      Saved: tsne_term26_y3_katar.png
    Term 27, Year 1: 1 contexts
      Skipping - insufficient contexts
    Term 27, Year 2: 58 contexts


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

      Saved: tsne_term27_y2_katar.png
    Term 27, Year 3: 188 contexts


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

      Saved: tsne_term27_y3_katar.png
    Term 27, Year 4: 152 contexts


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

      Saved: tsne_term27_y4_katar.png
    Term 27, Year 5: 150 contexts


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

      Saved: tsne_term27_y5_katar.png
    Term 27, Year 6: 166 contexts


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

      Saved: tsne_term27_y6_katar.png
    Term 28, Year 1: 4 contexts
      Skipping - insufficient contexts

  Processing 'salÃ§a'...
    Term 23, Year 2: 0 contexts
      Skipping - insufficient contexts
    Term 23, Year 3: 0 contexts
      Skipping - insufficient contexts
    Term 23, Year 4: 0 contexts
      Skipping - insufficient contexts
    Term 23, Year 5: 0 contexts
      Skipping - insufficient contexts
    Term 24, Year 1: 0 contexts
      Skipping - insufficient contexts
    Term 24, Year 2: 0 contexts
      Skipping - insufficient contexts
    Term 24, Year 3: 1 contexts
      Skipping - insufficient contexts
    Term 25, Year 1: 0 contexts
      Skipping - insufficient contexts
    Term 25, Year 2: 0 contexts
      Skipping - insufficient contexts
    Term 26, Year 1: 1 contexts
      Skipping - insufficient contexts
    Term 26, Year 2: 0 contexts
      Skipping - insufficient contexts
    Term 26, Year 3: 0 contexts
      Skipping - insufficient contexts
    Term 27, 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

      Saved: tsne_term27_y4_salÃ§a.png
    Term 27, Year 5: 1 contexts
      Skipping - insufficient contexts
    Term 27, Year 6: 6 contexts
      Skipping - insufficient contexts
    Term 28, Year 1: 0 contexts
      Skipping - insufficient contexts

âœ“ All t-SNE plots generated!


In [None]:
# Generate heatmaps / similarity matrix

print("\n=== Starting comparative analysis (semantic drift between consecutive years) ===\n")
from collections import defaultdict
terms_data = defaultdict(list)
for term, year in all_combinations:
    terms_data[term].append(year)


for target_word in TARGET_WORDS:
    print(f"\n\n===  Analyzing '{target_word}' ===")

    # Process each term
    for current_term, years in sorted(terms_data.items()):
        # Only process consecutive year pairs
        sorted_years = sorted(years)
        if len(sorted_years) < 2:
            print(f"\nSkipping Term {current_term}: insufficient years (has {len(sorted_years)})")
            continue
        
        # Process all consecutive year pairs
        for i in range(len(sorted_years) - 1):
            year1, year2 = sorted_years[i], sorted_years[i+1]
            
            print(f"\n  Processing Term {current_term}: Year {year1} â†’ Year {year2}")
            
            # Fetch data
            texts_y1 = fetch_speeches(current_term, year1)
            texts_y2 = fetch_speeches(current_term, year2)
            
            # Extract contexts
            contexts_y1 = extract_contexts(texts_y1, target_word)
            contexts_y2 = extract_contexts(texts_y2, target_word)
            print(f"    Contexts Year {year1}: {len(contexts_y1)}, Year {year2}: {len(contexts_y2)}")
            
            if len(contexts_y1) < 30 or len(contexts_y2) < 30:
                print(f"    Not enough contexts, skipping...")
                continue
            
            # Compute embeddings
            emb_y1 = compute_embeddings(model, contexts_y1)
            emb_y2 = compute_embeddings(model, contexts_y2)
            
            # --- Year 1: AP ---
            ap1 = AffinityPropagation(random_state=42)
            ap1.fit(emb_y1)
            labels_y1 = ap1.labels_
            prototypes_y1 = get_cluster_prototypes(emb_y1, labels_y1)
            print(f"    Year {year1} clusters: {len(prototypes_y1)}")
            
            # --- Year 2: APP (Incremental) ---
            combined = np.vstack([emb_y2, prototypes_y1])
            ap2 = AffinityPropagation(random_state=42)
            ap2.fit(combined)
            labels_combined = ap2.labels_
            labels_y2 = labels_combined[:len(emb_y2)]
            prototypes_y2 = get_cluster_prototypes(combined, labels_combined)
            print(f"    Year {year2} clusters: {len(prototypes_y2)}")
            
            # --- Print representative contexts ---
            print(f"\n    === Year {year1} Clusters ===")
            show_top_contexts(contexts_y1, labels_y1)
            print(f"\n    === Year {year2} Clusters ===")
            show_top_contexts(contexts_y2, labels_y2)
            
            # --- Save CSV results ---
            df_y1 = pd.DataFrame({"year": year1, "context": contexts_y1, "cluster": labels_y1})
            df_y2 = pd.DataFrame({"year": year2, "context": contexts_y2, "cluster": labels_y2})
            df_all = pd.concat([df_y1, df_y2], ignore_index=True)
            csv_path = os.path.join(OUTPUT_DIR, f"widid_term{current_term}_y{year1}vs{year2}_{target_word}.csv")
            df_all.to_csv(csv_path, index=False)
            print(f"    Saved clusters to {csv_path}")
            
            # --- Compare prototypes (semantic drift) ---
            sim_matrix = cosine_similarity(prototypes_y1, prototypes_y2)
            df_sim = pd.DataFrame(sim_matrix, 
                                  index=[f"Y{year1}_{i}" for i in range(len(prototypes_y1))],
                                  columns=[f"Y{year2}_{j}" for j in range(len(prototypes_y2))])
            
            plt.figure(figsize=(6,4))
            plt.imshow(sim_matrix, cmap="Blues")
            plt.colorbar(label="Cosine Similarity")
            plt.title(f"Semantic Drift: '{target_word}' (Term {current_term}, Y{year1}â†’Y{year2})")
            plt.xlabel(f"Year {year2} Senses")
            plt.ylabel(f"Year {year1} Senses")
            heatmap_path = os.path.join(OUTPUT_DIR, f"heatmap_term{current_term}_y{year1}vs{year2}_{target_word}.png")
            plt.savefig(heatmap_path, bbox_inches="tight")
            plt.close()  # Close to avoid display
            
            # Note: t-SNE plots for individual years are already generated above
            print(f"    Saved results for '{target_word}' (Term {current_term}, Y{year1}â†’Y{year2})")
            print("    " + "-"*50)


=== Starting comparative analysis (semantic drift between consecutive years) ===



===  Analyzing 'katar' ===

  Processing Term 23: Year 2 â†’ Year 3
    Contexts Year 2: 0, Year 3: 1
    Not enough contexts, skipping...

  Processing Term 23: Year 3 â†’ Year 4
    Contexts Year 3: 1, Year 4: 7
    Not enough contexts, skipping...

  Processing Term 23: Year 4 â†’ Year 5
    Contexts Year 4: 7, Year 5: 4
    Not enough contexts, skipping...

  Processing Term 24: Year 1 â†’ Year 2
    Contexts Year 1: 0, Year 2: 18
    Not enough contexts, skipping...

  Processing Term 24: Year 2 â†’ Year 3
    Contexts Year 2: 18, Year 3: 15
    Not enough contexts, skipping...

  Processing Term 25: Year 1 â†’ Year 2
    Contexts Year 1: 0, Year 2: 0
    Not enough contexts, skipping...

  Processing Term 26: Year 1 â†’ Year 2
    Contexts Year 1: 34, Year 2: 164


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/6 [00:00<?, ?it/s]

    Year 1 clusters: 8
    Year 2 clusters: 15

    === Year 1 Clusters ===

ðŸŒ€ Cluster 0 (4 examples):
   â€¢ doÄŸu da arabulucu ve barÄ±ÅŸtan yana tutum sergilendiÄŸi bu kapsamda katar la askerÃ® iÅŸ birliÄŸi anlaÅŸmasÄ±nÄ±n yapÄ±ldÄ±ÄŸÄ± tbmm nin onayladÄ±ÄŸÄ± antlaÅŸmayla
   â€¢ herhangi bir sakÄ±nca sÃ¶z konusu deÄŸildir siz tamamen kiÅŸisel yorumlarÄ±nÄ±zÄ± katarak hdp Ã¼zerinden bir niyet okuma anlayÄ±ÅŸÄ±yla her kÃ¼rdistan kelimesi geÃ§tiÄŸinde
   â€¢ dedi bilmem ne dedi ama en sonunda dÃ¶nÃ¼p dolaÅŸtÄ± suud katar el nusra el kaide ÅŸer odaÄŸÄ±nÄ±n maalesef bir parÃ§asÄ± hÃ¢line

ðŸŒ€ Cluster 1 (1 examples):
   â€¢ 04 2006 5484 gÃ¼ney afrika cumhuriyeti 01 06 2006 5513 katar devleti 01 06 2006 5514 bosna hersek 174 v ergi

ðŸŒ€ Cluster 2 (4 examples):
   â€¢ Ã¼lkesinde baÄ£ka para birimlerine mesela dolara dÃ¶nÃ¼Ä£ebilmesine konvertibilitesine engel deÄŸildir katar riyali Ã¼lkemizde konvertible deÄŸilken yani elimizdeki tl yi merkez bankasÄ±
   â€¢ elimizdeki tl yi merkez ban



    Saved results for 'katar' (Term 26, Y1â†’Y2)
    --------------------------------------------------

  Processing Term 26: Year 2 â†’ Year 3
    Contexts Year 2: 164, Year 3: 85


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

    Year 2 clusters: 23
    Year 3 clusters: 11

    === Year 2 Clusters ===

ðŸŒ€ Cluster 0 (2 examples):
   â€¢ havaalanÄ±nÄ±n proje maliyeti 50 milyon avrodur diÄŸer gelirleri de hesaba katarsak firma en az sekiz dokuz yÄ±lda yatÄ±rÄ±m maliyetini finanse edecektir
   â€¢ bir bÃ¼yÃ¼k firma bir tavukÃ§uluk firmasÄ± brezilya ortaklÄ± firmalara satÄ±ldÄ± katarlÄ±lar da var tabii her zaman sizin ortaklÄ±k yaptÄ±ÄŸÄ±nÄ±z katarlÄ±lar da

ðŸŒ€ Cluster 1 (21 examples):
   â€¢ baÅŸÄ±na dÃ¼ÅŸen millÃ® geliri bizden daha iyi onlardan iyi durumdayÄ±z katar Ä± da sÃ¶yleyeyim herhÃ¢lde 100 bin dolara yakÄ±n kiÅŸi baÅŸÄ±na
   â€¢ tek adam rejimlerine diktatÃ¶rlÃ¼klere karÅŸÄ± bu kadar hassas olan hÃ¼kÃ»metinizin katar daki tek adam rejimine faÅŸist diktatÃ¶rlÃ¼ÄŸe hanedanlÄ±ÄŸa karÅŸÄ± bu sempatisinin
   â€¢ daki bÃ¼tÃ¼n Ã¼lkelere tek adam rejimi diktatÃ¶rlÃ¼k diye tanÄ±mlar yaptÄ±nÄ±z katar da demokrasi mi var bi li m sanayi ve teknoloji

ðŸŒ€ Cluster 2 (5 examples):
   â€¢ stanbul teÅŸekkÃ¼r eder

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/6 [00:00<?, ?it/s]

    Year 2 clusters: 10
    Year 3 clusters: 11

    === Year 2 Clusters ===

ðŸŒ€ Cluster 0 (1 examples):
   â€¢ verde 64 kamboÃ§ya 65 kamerun 66 kanada 67 karadaÄŸ 68 katar 69 kazakistan 70 kenya 71 kÄ±rgÄ±zistan 72 kktc 73 kolombiya

ðŸŒ€ Cluster 1 (1 examples):
   â€¢ dÃ¼nkÃ¼ toplantÄ±sÄ±nda cumhurbaÅŸkanÄ± yardÄ±mcÄ±sÄ± sayÄ±n fuat oktay cumhurbaÅŸkanÄ±nÄ±n binmesi iÃ§in katar dan alÄ±nan uÃ§an sarayla ilgili sorulara hem cevap vermiÅŸ hem

ðŸŒ€ Cluster 2 (2 examples):
   â€¢ ithal ediyoruz rusya azerbaycan i ran ve lng olarak cezayir katar nijerya gibi Ã¼lkelerden 8 10 Ã§eÅŸit kaynak Ã¼lkenin Ã¼rÃ¼nlerini burada
   â€¢ amerika gÃ¶rÃ¼ÅŸÃ¼yor rusya gÃ¶rÃ¼ÅŸÃ¼yor i ran gÃ¶rÃ¼ÅŸÃ¼yor suudi arabistan gÃ¶rÃ¼ÅŸÃ¼yor katar gÃ¶rÃ¼ÅŸÃ¼yor ama siz gÃ¶rÃ¼ÅŸmÃ¼yorsunuz neden Ã§Ã¼nkÃ¼ Ã¶yle bir angajmana girdiniz

ðŸŒ€ Cluster 3 (1 examples):
   â€¢ de gÃ¼venlik gÃ¼Ã§lerimizin terÃ¶rle mÃ¼cadeleyi etkin bir ÅŸekilde gecesini gÃ¼ndÃ¼zÃ¼ne katarak sÃ¼rdÃ¼rmesi takdire ÅŸayandÄ±r deÄŸerli milletv

Batches:   0%|          | 0/6 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

    Year 3 clusters: 28
    Year 4 clusters: 17

    === Year 3 Clusters ===

ðŸŒ€ Cluster 0 (2 examples):
   â€¢ noktalarÄ±ndan olan somali yle beraber yine bir baÅŸka denizaÅŸÄ±rÄ± Ã¼lkede katar da bulunan askerÃ® Ã¼slerimizin varlÄ±ÄŸÄ± ismi anÄ±lan bu iki Ã¼lkeyle
   â€¢ de dikkate alÄ±ndÄ±ÄŸÄ±nda kÃ¼resel enerji taÅŸÄ±macÄ±lÄ±ÄŸÄ± anlamÄ±nda stratejik noktalarda bulunan katar ve somali gibi iki Ã¶nemli Ã¼lkede askerÃ® Ã¼slerimiz vasÄ±tasÄ±yla bulunmamÄ±z

ðŸŒ€ Cluster 1 (3 examples):
   â€¢ si katar ordusuna verilen 13 yÃ¶netim kurulu Ã¼yesinin 7 si katar ordusuna verilen yani yetkinin tamamÄ± katar ordusuna verilen bir proje
   â€¢ kurulu Ã¼yesinin 7 si katar ordusuna verilen yani yetkinin tamamÄ± katar ordusuna verilen bir proje millÃ® olamaz bu bir i ki
   â€¢ fabrikayÄ± diyor kendi ordusunun bir tesisini bu kadar kÃ¼Ã§Ã¼ltÃ¼cÃ¼ ÅŸekilde katar ordusununsa bir ÅŸirketini Ã¶vÃ¼ne Ã¶vÃ¼ne anlatan bir siyasetÃ§iyle ilk defa

ðŸŒ€ Cluster 2 (6 examples):
   â€¢ tabii burada sÃ¶ylem 

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

    Year 4 clusters: 25
    Year 5 clusters: 15

    === Year 4 Clusters ===

ðŸŒ€ Cluster 0 (8 examples):
   â€¢ seÃ§meninin yÃ¼zde 55 i bÃ¼tÃ¼n anketlerde diyor ki tank paletin katar a satÄ±ÅŸÄ± yanlÄ±ÅŸtÄ±r mikrofon otomatik cihaz tarafÄ±ndan kapatÄ±ldÄ± baÅŸkan tamamlayÄ±n
   â€¢ taktÄ± bir Ã§arpÄ±tmanÄ±n peÅŸine taktÄ± duyan duymayan herkes rahatsÄ±z oldu katar a tank paletin satÄ±ÅŸÄ±ndan bir daha haberdar oldu bundan ak
   â€¢ verirlerse olur dedim o da yÃ¼zde 50 eksi 1 ini katarlÄ±lara vermek Ã¼zere bu anlaÅŸmayÄ± yaptÄ± diyor aÃ§Ä±kÃ§a cumhurbaÅŸkanÄ±nÄ±zÄ±n tank palet

ðŸŒ€ Cluster 1 (3 examples):
   â€¢ devasa uÃ§ak sabiha gÃ¶kÃ§en e inecek sakarya da ne kadar katarlÄ± asker katarlÄ± mÃ¼hendis katarlÄ± Ã§alÄ±ÅŸan varsa uÃ§aÄŸa binecek mustafa arslan
   â€¢ sabiha gÃ¶kÃ§en e inecek sakarya da ne kadar katarlÄ± asker katarlÄ± mÃ¼hendis katarlÄ± Ã§alÄ±ÅŸan varsa uÃ§aÄŸa binecek mustafa arslan tokat ya
   â€¢ e inecek sakarya da ne kadar katarlÄ± asker katarlÄ± mÃ¼hendis katarlÄ

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/6 [00:00<?, ?it/s]

    Year 5 clusters: 25
    Year 6 clusters: 14

    === Year 5 Clusters ===

ðŸŒ€ Cluster 0 (14 examples):
   â€¢ dÄ±ÅŸ aktÃ¶r arÄ±yorsanÄ±z bunlara bakÄ±n bugÃ¼n dÄ±ÅŸiÅŸleri bakanÄ± mevlÃ¼t Ã§avuÅŸoÄŸlu katar da basÄ±na yansÄ±yan baÅŸlÄ±ÄŸa bakÄ±yoruz tÃ¼rkiye nin ekonomik gidiÅŸi nedeniyle
   â€¢ tÃ¼rkiye nin ekonomik gidiÅŸi nedeniyle ortaya Ã§Ä±kacak fÄ±rsatlarÄ± deÄŸerlendiriyoruz bu katar dÄ±ÅŸiÅŸleri bakanÄ±nÄ±n sÃ¶zÃ¼ nedir bu fÄ±rsatlar bu fÄ±rsatlar bu Ã¼lkenin
   â€¢ alkÄ±ÅŸlar ÅŸimdi beyefendi katar a gitmiÅŸ mithat hocam da sÃ¶yledi katar dÄ±ÅŸiÅŸleri bakanÄ± ekonomik gidiÅŸat nedeniyle tÃ¼rkiye de ortaya Ã§Ä±kacak fÄ±rsatlarÄ±

ðŸŒ€ Cluster 1 (6 examples):
   â€¢ yanÄ±ma zengin bir ortak almam lazÄ±m neredeyse kan baÄŸÄ±nÄ±z olduÄŸu katarlÄ±lar aklÄ±na geliyor ve katarlÄ±larla baÄŸlantÄ± kuruyorlar sana katarlÄ± bir ortak
   â€¢ almam lazÄ±m neredeyse kan baÄŸÄ±nÄ±z olduÄŸu katarlÄ±lar aklÄ±na geliyor ve katarlÄ±larla baÄŸlantÄ± kuruyorlar sana katarlÄ± bir ortak vereli