In [52]:
from transformers import pipeline
import torch
import pandas as pd
import re
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import folium
from folium.plugins import HeatMap

In [2]:
pipe = pipeline("token-classification", model="akdeniz27/bert-base-turkish-cased-ner")

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


# Example

In [3]:
text = "arkadaşımıza ulaşamıyoruz kahramanmaraş elbistan pınarbaşı mahallesi pınarbaşı caddesi cemre yapıcı kahramanmaraş deprem"

result = pipe(text)

print("Token ve Etiketler:")

location = ""
address_keywords = ["mahallesi", "caddesi", "sokak", "bulvarı", "köyü", "yolu", "mevkii", "mah.", "cad.", "sok.", "bul.", "köy.", "yol.", "mek.","apartman","apt"]

loc_tokens = []
for entity in result:
    token = entity['word']
    label = entity['entity']
    print(f"Token: {token}, Etiket: {label}")

    if "LOC" in label:  
        if token.startswith("##"):
            loc_tokens[-1] += token.lstrip("##")  
        else:
            loc_tokens.append(token)

words = text.split()  
for i, word in enumerate(words):
    if any(loc in word for loc in loc_tokens):
        location += word + " " 
    if any(keyword in word.lower() for keyword in address_keywords):
            location += word + " " 
loc_tokens
print("\nLocation:")
print(location.strip())

Token ve Etiketler:
Token: kahraman, Etiket: B-LOC
Token: ##maraş, Etiket: B-LOC
Token: elbis, Etiket: B-LOC
Token: ##tan, Etiket: B-LOC
Token: p, Etiket: B-LOC
Token: ##ınar, Etiket: B-LOC
Token: ##başı, Etiket: B-LOC
Token: p, Etiket: B-LOC
Token: ##ınar, Etiket: B-LOC
Token: ##başı, Etiket: B-LOC
Token: cem, Etiket: B-PER
Token: ##re, Etiket: B-PER
Token: kahraman, Etiket: B-LOC
Token: ##maraş, Etiket: B-LOC

Location:
kahramanmaraş elbistan pınarbaşı mahallesi pınarbaşı caddesi kahramanmaraş


In [4]:
text = "çocuk enkazın altında ai̇leşi̇ i̇ki̇ gündür acil müdahale edi̇lmesi̇ni̇ bekliyor adres akasya mahallesi şükrü balcı caddesi sara apartmanı hatay antakya iletişim deprem sondakikadeprem acil acildeprem"

result = pipe(text)

print("Token ve Etiketler:")

location = ""

loc_tokens = []
for entity in result:
    token = entity['word']
    label = entity['entity']
    print(f"Token: {token}, Etiket: {label}")

    # LOC etiketli tokenları topla
    if 'LOC' in label:  
            if not token.startswith('##'):  
                if location:  
                    location += " " 
                location += token
            else:
                location += token.lstrip('##')  


print("\nLocation:")
print(location.strip())


Token ve Etiketler:
Token: ak, Etiket: B-LOC
Token: ##asy, Etiket: B-LOC
Token: ##a, Etiket: B-LOC
Token: bal, Etiket: I-PER
Token: ##cı, Etiket: I-PER
Token: sar, Etiket: B-LOC
Token: ##a, Etiket: B-LOC
Token: hata, Etiket: B-LOC
Token: ##y, Etiket: B-LOC
Token: ant, Etiket: B-LOC
Token: ##ak, Etiket: B-LOC
Token: ##ya, Etiket: B-LOC

Location:
akasya sara hatay antakya


In [5]:
geolocator = Nominatim(user_agent="geoapi")

# Adres olarak tam metni belirtiyoruz
address = "hatay esenlik mahallesi"

# Adresi enlem ve boylama çevir
location = geolocator.geocode(address)

if location:
    print(f"Adres: {address}")
    print(f"Enlem: {location.latitude}, Boylam: {location.longitude}")
else:
    print(f"Adres bulunamadı: {address}")

Adres: hatay esenlik mahallesi
Enlem: 36.2059571, Boylam: 36.1478466


# Get Location 

In [10]:
df = pd.read_csv("tweets_cleaned.csv")

In [11]:
def capitalize(text):
    words = text.split()
    capitalized_words = [word.capitalize() for word in words]
    capitalized_text = ' '.join(capitalized_words)
    return capitalized_text

df['content_wo_tokenize_capitalize'] = df['content_wo_normalize'].apply(capitalize)

In [13]:
def get_location_from_text(text):
    if not text.strip():
        return None 
    result = pipe(text) 
    location = "" 
    
    for entity in result:
        token = entity['word']
        label = entity['entity']

        if 'LOC' in label:  
            if not token.startswith('##'):  
                if location:  
                    location += " " 
                location += token
            else:
                location += token.lstrip('##')  
   

    return location.strip() if location else None

df['location'] = df['content_wo_tokenize_capitalize'].apply(get_location_from_text)

In [16]:
def remove_duplicates(text):
    if not text:
        return text
    words = text.split()
    unique_words = list(set(words))
    unique_words.sort(key=lambda x: words.index(x))  
    return ' '.join(unique_words)

df['location_unique'] = df['location'].apply(remove_duplicates)

## Heatmap Türkiye Map

In [17]:
sehirler = [
    "Adana", "Adıyaman", "Afyon", "Ağrı", "Amasya", "Ankara", "Antalya", "Artvin",
    "Aydın", "Balıkesir", "Bilecik", "Bingöl", "Bitlis", "Bolu", "Burdur", "Bursa", "Çanakkale",
    "Çankırı", "Çorum", "Denizli", "Diyarbakır", "Edirne", "Elazığ", "Erzincan", "Erzurum", 
    "Eskişehir", "Gaziantep", "Giresun", "Gümüşhane", "Hakkari", "Hatay", "Isparta", "Mersin",
    "İstanbul", "İzmir", "Kars", "Kastamonu", "Kayseri", "Kırklareli", "Kırşehir", "Kocaeli",
    "Konya", "Kütahya", "Malatya", "Manisa", "Kahramanmaraş", "Mardin", "Muğla", "Muş", 
    "Nevşehir", "Niğde", "Ordu", "Rize", "Sakarya", "Samsun", "Siirt", "Sinop", "Sivas", 
    "Tekirdağ", "Tokat", "Trabzon", "Tunceli", "Şanlıurfa", "Uşak", "Van", "Yozgat", 
    "Zonguldak", "Aksaray", "Bayburt", "Karaman", "Kırıkkale", "Batman", "Şırnak", 
    "Bartın", "Ardahan", "Iğdır", "Yalova", "Karabük", "Kilis", "Osmaniye", "Düzce"
]


### Tokenize Location

In [20]:
df['tokenized_location'] = df['location_unique'].apply(lambda x: x.split() if pd.notnull(x) else [])

### Find City

In [None]:
def extract_cities(tokens, city_list):
    if not tokens:  
        return []
    matches = []
    for city in city_list:
        for token in tokens:
            if re.search(r'\b' + re.escape(city) + r'\b', token, re.IGNORECASE): 
                matches.append(city)
    return list(set(matches))  


In [43]:
df['city'] = df['tokenized_location'].apply(lambda x: extract_cities(x, sehirler))

In [46]:
city_exploded = df['city'].explode()
city_count = city_exploded.value_counts()
city_count_df = city_count.reset_index()
city_count_df.columns = ['City', 'Count']

In [48]:
city_count_df.head()

Unnamed: 0,City,Count
0,Hatay,423
1,Kahramanmaraş,208
2,Adıyaman,166
3,Gaziantep,87
4,Malatya,64


### Get latitude & longitude

In [49]:
geolocator = Nominatim(user_agent="geoapi")

def get_coordinates(address):
    if not address or pd.isna(address):
        return None, None
    try:
        location = geolocator.geocode(address, timeout=10)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except GeocoderTimedOut:
        return None, None

In [50]:
city_count_df[['Latitude', 'Longitude']] = city_count_df['City'].apply(
    lambda city: pd.Series(get_coordinates(city))
)

# Heatmap

In [None]:
m = folium.Map(location=[41.008238, 28.978359], zoom_start=6)
heat_data = [[row['Latitude'], row['Longitude'], row['Count']] for index, row in city_count_df.iterrows()]
HeatMap(heat_data, 
        min_opacity=0.4,  
        max_opacity=0.9, 
        radius=50,  
        blur=30,  
        gradient={  
            0.1: 'blue',   
            0.3: 'lime',   
            0.5: 'yellow',  
            0.7: 'orange',
            1.0: 'red'      
        }).add_to(m)

for index, row in city_count_df.iterrows():
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=20,  # Marker boyutu
        color='black',
        fill=True,
        fill_opacity=0.7,
        popup=f"{row['City']}: {row['Count']}",
        tooltip=f"{row['City']} - {row['Count']} people"
    ).add_to(m)

m.save('heatmap_with_numbers.html')

In [59]:
city_count_df.head()

Unnamed: 0,City,Count,Latitude,Longitude
0,Hatay,423,36.202547,36.160291
1,Kahramanmaraş,208,37.783034,36.830655
2,Adıyaman,166,37.78936,38.31411
3,Gaziantep,87,37.062832,37.379262
4,Malatya,64,38.348715,38.319067


In [60]:
df.to_csv('datawith_city.csv', index=False)

### Example

In [None]:
geolocator = Nominatim(user_agent="geoapi")

address = "Kahramanmaraş Elbistan Pınarbaşı Mahallesi Caddesi"
location = geolocator.geocode(address)

if location:
    print(f"Adres: {address}")
    print(f"Enlem: {location.latitude}, Boylam: {location.longitude}")
else:
    print(f"Adres bulunamadı: {address}")

Adres: Kahramanmaraş Elbistan Pınarbaşı
Enlem: 38.1914464, Boylam: 37.2055864
