In [57]:
from transformers import BertTokenizer, BertModel
import torch
from sklearn.model_selection import train_test_split
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import random
import folium

In [2]:
data = pd.read_csv('tweets_with_labels.csv')

In [26]:
X = data["content_no_rare_words"]  
y = data["predicted_label"]
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
label_mapping

{'acil': 0, 'bilgilendirme': 1, 'destek': 2}

In [27]:
# Train-test ayırma (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train,y_train

(1378    ['güzel', 'günle', 'göster', 'rabbi', 'aç', 'y...
 1827    ['allah', 'akıl', 'durdur', 'görüntü', 'gerçek...
 678     ['yıl', 'deprem', 'tweet', 'paylaş', 'kahraman...
 1083    ['çocuk', 'haber', 'gel', 'gir', 'durum', 'bul...
 1559    ['deprem', 'bölge', 'yara', 'kullan', 'paylaş'...
                               ...                        
 1130    ['deprem', 'kara', 'ol', 'dünya', 'şiddet', 'd...
 1294    ['ali', 'enkaz', 'alt', 'mahalle', 'cadde', 'a...
 860     ['sondakika', 'deprem', 'geçmiş', 'şubat', 'bü...
 1459    ['sondakika', 'deprem', 'geçmiş', 'şubat', 'ad...
 1126    ['mahalle', 'sokak', 'karşı', 'karşı', 'adıyam...
 Name: content_no_rare_words, Length: 1596, dtype: object,
 array([1, 2, 0, ..., 0, 0, 0]))

In [28]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(set(y)))  # y'nin benzersiz etiket sayısı kadar çıkacak

# Verinizi tokenize etme
def tokenize_data(texts, tokenizer, max_length=128):
    return tokenizer(texts, padding=True, truncation=True, max_length=max_length, return_tensors='pt')

train_encodings = tokenize_data(X_train.tolist(), tokenizer)
test_encodings = tokenize_data(X_test.tolist(), tokenizer)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [29]:
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = CustomDataset(train_encodings, y_train.tolist())
test_dataset = CustomDataset(test_encodings, y_test.tolist())

In [30]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

optimizer = AdamW(model.parameters(), lr=5e-5)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [31]:
epochs = 3
for epoch in range(epochs):
    model.train()
    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        optimizer.zero_grad()
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1} completed with loss: {loss.item()}")

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch 1 completed with loss: 0.9144275784492493
Epoch 2 completed with loss: 0.36982882022857666
Epoch 3 completed with loss: 0.6141207218170166


In [32]:
model.eval()
correct_preds = 0
total_preds = 0

with torch.no_grad():
    for batch in test_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        predictions = torch.argmax(outputs.logits, dim=-1)
        correct_preds += (predictions == batch['labels']).sum().item()
        total_preds += batch['labels'].size(0)

accuracy = correct_preds / total_preds
print(f"Test Accuracy: {accuracy:.4f}")

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Test Accuracy: 0.8246


In [47]:
def predict_label(text):
    # Metni tokenization yap
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)

    # Modeli çalıştır
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    # Logits'ten tahmin edilen etiketin çıkartılması
    predicted_label = torch.argmax(logits, dim=1).item()

    return predicted_label

# Test etmek için örnek bir metin
input_text = "acil bölge deprem altında kaldım yardım edin"
predicted_class = predict_label(input_text)

predicted_label = {v: k for k, v in label_mapping.items()}[predicted_class]
print(predicted_label)

destek


In [48]:
predicted_labels = []
for text in X_test:  # X_test içerisindeki her metni alıyoruz
    predicted_class = predict_label(text)  # Modelin tahmini
    # Sayısal tahmini etiketle eşleştir
    predicted_label = label_encoder.inverse_transform([predicted_class])[0]  # Etiketi geri alıyoruz
    predicted_labels.append(predicted_label)

# Sonuçları yazdırma
for input_text, predicted_label in zip(X_test, predicted_labels):
    print(f"Girdi: {input_text}\nTahmin Edilen Etiket: {predicted_label}\n")

Girdi: ['“', 'allah', '”', 'deprem']
Tahmin Edilen Etiket: bilgilendirme

Girdi: ['yardım', 'tır', 'gönder', 'dikkat', 'tır', 'iç', 'koy', 'gelen', 'gel', 'hatay', 'deprem']
Tahmin Edilen Etiket: acil

Girdi: ['ülke', '’', '’', 'in', 'oluş', 'deprem', 'bölge', 'gönder', 'dış', 'saat', 'mevcut', 'ara', 'kurtar', 'görev', 'ulaş', 'toplam', 'sayı', 'yetersiz', 'insan', 'soğuk', 'enkaz', 'alt']
Tahmin Edilen Etiket: acil

Girdi: ['sor', 'şeyma', 'deprem', 'şeyma', 'deprem', 'ülke', 'yaşa', 'deprem', 'il', 'kahramanmaraş', 'yaşa', 'gün', 'izmir', '’', 'in', 'kader', 'aile', 'iyi', 'yap', 'hakk', 'ara']
Tahmin Edilen Etiket: acil

Girdi: ['deprem', 'destek']
Tahmin Edilen Etiket: bilgilendirme

Girdi: ['deprem', 'afet', 'afet', 'bul', 'insa', 'hizmet', 'gir']
Tahmin Edilen Etiket: bilgilendirme

Girdi: ['yap', 'insa', 'gerek', 'yap', 'bina', 'kader', 'afad']
Tahmin Edilen Etiket: bilgilendirme

Girdi: ['arkadaş', 'takip', 'sayı', 'alt', 'insan', 'tweet', 'tweet', 'alt', 'rt', 'yap', 'yardım'

In [81]:
label_color_map = {
    "acil": "red",            
    "bilgilendirme": "blue",   
    "destek": "darkgreen",      
}
icons_map = {
    "acil": "exclamation-circle",
    "bilgilendirme": "info-circle",  
    "destek": "flag",         
}

In [82]:
locations = []

def generate_random_location():
    lat = random.uniform(35.52, 37.4) 
    lon = random.uniform(35.40, 36.35)  
    return lat, lon


for label in predicted_labels:
    lat, lon = generate_random_location()  
    locations.append((lat, lon, label))  # Etiket ile birlikte konumu sakla

# Harita başlangıç konumu (İstanbul)
map_center = [35.52, 36.4]  # İstanbul'un ortalama koordinatları
map = folium.Map(location=map_center, zoom_start=10)

# Her konumu ve etiketini haritada işaretleyelim
for lat, lon, label in locations:
    color = label_color_map.get(label, "gray")   
    icon = icons_map.get(label, "info-circle")
    folium.Marker(
        [lat, lon],
        popup=f"{label.upper()}",  # Etiketin büyük harfli versiyonu
        icon=folium.Icon(
            color=color,            
            icon=icon,       # Kullanılacak simge (font awesome)
            prefix="fa",            # Font Awesome simgelerini kullan
        )
    ).add_to(map)

# Haritayı kaydet
map.save("map_with_custom_icons.html")