In [1]:
from address_resolver import AddressAPI
import concurrent.futures
from multiprocessing.pool import ThreadPool

In [36]:
import os
from dotenv import load_dotenv
load_dotenv('.env')
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
NER_API_KEY = os.getenv("NER_API_KEY")

address_api = AddressAPI(GOOGLE_API_KEY, OPENAI_API_KEY, NER_API_KEY)
address_text = 'Hatay ANTAKYA cebrail mah cumhuriyet caddesi seçkinler apt a 3'

print("Regex API")
display(address_api.regex_api_request(address_text, entry_id=1))

print("OpenAI API")
display(address_api.openai_api_request(address_text, entry_id=1))

print("Google Geocode API")
display(address_api.google_geocode_api_request(address_text, entry_id=1))

Regex API


{'city': 'Hatay',
 'distinct': 'ANTAKYA',
 'neighbourhood': 'cebrail mah',
 'excessData': {'street_road': 'cumhuriyet caddesi',
  'complex': '',
  'apartment': 'seçkinler apt',
  'part': '',
  'block': '',
  'floor': '',
  'apartment_no': '',
  'phone': ''},
 'originalText': 'Hatay ANTAKYA cebrail mah cumhuriyet caddesi seçkinler apt a 3',
 'address': 'Hatay ANTAKYA cebrail mah cumhuriyet caddesi seçkinler apt',
 'ws': 0.7142857142857143,
 'entry_id': 1}

OpenAI API


{'city': 'Hatay',
 'distinct': 'Antakya',
 'neighbourhood': 'Cebrail Mahallesi',
 'street': 'Cumhuriyet Caddesi',
 'no': '3',
 'tel': '',
 'name_surname': '',
 'address': 'Antakya, Cebrail Mahallesi, Cumhuriyet Caddesi, Seçkinler Apt. No:3',
 'entry_id': 1}

Google Geocode API


{'address': 'Hatay ANTAKYA cebrail mah cumhuriyet caddesi seçkinler apt a 3',
 'latitude': 36.2033813,
 'longitude': 36.1588396,
 'northeast_lat': 36.20469882989271,
 'northeast_lng': 36.16017122989272,
 'southwest_lat': 36.20199917010727,
 'southwest_lng': 36.15747157010728,
 'formatted_address': 'Cebrail Mah.Cumhuriyet Cad.Seçkinler Apt Altı, No:10/4, 31040 Antakya/Hatay, Türkiye',
 'is_resolved': True,
 'entry_id': 1}

In [6]:
import pandas as pd
sample_df = pd.read_csv('../data_for_regex_v2.csv')

address_df = pd.DataFrame()
address_df['entry_id'] = range(0,100)
address_df['raw_text'] = sample_df.full_text.values[:100]
address_df['channel'] = 'Twitter'
address_df['metadata'] = sample_df[sample_df.extra_parameters.isnull()==False][:100].extra_parameters.values
address_df['timestamp'] = address_df['metadata'].apply(lambda x: eval(x)['created_at'])
address_df

Unnamed: 0,entry_id,raw_text,channel,metadata,timestamp
0,0,"COCUK KAYIP⚠️⚠️⚠️\n \nGOKTURK CADDESI\nNO:46, ...",Twitter,"{""user_id"": ""1507064701351600130"", ""screen_nam...",2023-02-08 02:57:06
1,1,NERDE BU AFAD,Twitter,"{""user_id"": ""1521470073822720002"", ""screen_nam...",2023-02-08 02:57:05
2,2,Adıyaman ve ilçeleri enkaz altında. Ulaşılması...,Twitter,"{""user_id"": ""1492278858380464131"", ""screen_nam...",2023-02-08 03:03:34
3,3,Cebrail Mah. Cumhuriyet Cad. Seçkinler Apt. A ...,Twitter,"{""user_id"": ""1583974494837903361"", ""screen_nam...",2023-02-08 03:03:33
4,4,@bengidemem Betül Ekici ve ailesi\n\nŞehit Abd...,Twitter,"{""user_id"": ""2767389526"", ""screen_name"": ""Kork...",2023-02-08 02:57:05
...,...,...,...,...,...
95,95,Hissedilen herşeye cümle kurulmuyor,Twitter,"{""user_id"": ""1312111525897830401"", ""screen_nam...",2023-02-08 02:56:45
96,96,Turunçlu Bel Yeşilyayla Kent Sitesi Sani Akar ...,Twitter,"{""user_id"": ""4846569496"", ""screen_name"": ""no c...",2023-02-08 02:56:44
97,97,Kahramanmaraş şazibey mahallesi Ebrar sitesi Z...,Twitter,"{""user_id"": ""1482457785719791623"", ""screen_nam...",2023-02-08 02:56:45
98,98,@haluklevent Bahçelievler mahallesi Atatürk bu...,Twitter,"{""user_id"": ""1301628422674886667"", ""screen_nam...",2023-02-08 02:56:45


In [4]:
def pipeline(address_df):
    regex_results = pd.DataFrame([address_api.regex_api_request(raw_text, entry_id) for raw_text, entry_id in zip(address_df.raw_text.values, address_df.entry_id.values)])
    regex_to_geocode = regex_results[regex_results.ws >= 0.7]
    del regex_results

    # Ner Process
    address_df = address_df[~address_df.entry_id.isin(regex_to_geocode.entry_id.values)]
    with ThreadPool(60) as executor:
        ner_results = executor.map(lambda p: address_api.ner_api_request(*p) , zip(address_df.raw_text.values, address_df.entry_id.values))
    ner_results = pd.DataFrame(ner_results)
    ner_to_geocode = ner_results[ner_results.ws >= 0.5]
    del ner_results

    geocode_data = pd.concat([regex_to_geocode[['address', 'entry_id']], ner_to_geocode[['address', 'entry_id']]], axis=0)
    del regex_to_geocode, ner_to_geocode
        
    with ThreadPool(60) as executor:
        geocode_data = executor.map(lambda p: address_api.google_geocode_api_request(*p) , zip(geocode_data.address.values, geocode_data.entry_id.values))
    
    geocode_data = pd.DataFrame(geocode_data)  
    return geocode_data

In [5]:
geocode_data = pipeline(address_df)
geocode_data

Unnamed: 0,address,latitude,longitude,northeast_lat,northeast_lng,southwest_lat,southwest_lng,formatted_address,is_resolved,entry_id
0,Hatay ANTAKYA cebrail mah cumhuriyet caddesi s...,36.203044,36.159812,36.204362,36.161141,36.201662,36.158441,"Cebrail, Cumhuriyet Cd. No:2, 31030 Antakya/Ha...",True,3
1,Kahramanmaraş ONİKİŞUBAT şazibey mahallesi 640...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,,False,8
2,Hatay ANTAKYA ürgenpaşa mah ekinci yol buket apt,36.236263,36.142416,36.237503,36.143913,36.234804,36.141213,"Ekinci, Çevre Yolu, 31180 Antakya/Hatay, Türkiye",True,10
3,Hatay İSKENDERUN çay mahallesi atatürk bulvar ...,36.584673,36.175616,36.585950,36.177033,36.583250,36.174333,SAKARYA MAHALLESİ 278.SOKAK BİLGİ APARTMANI NO...,True,12
4,Hatay ANTAKYA odabaşı mahallesi 6 sokak zeytun...,36.223085,36.161904,36.224475,36.163206,36.221775,36.160506,"Odabaşı, 6. Sk., Antakya/Hatay, Türkiye",True,13
...,...,...,...,...,...,...,...,...,...,...
62,Osmaniye Düziçi Esenevler mahallesi Palmiy...,41.009770,29.092240,41.011099,29.093583,41.008400,29.090883,"Esenevler, 34762 Ümraniye/İstanbul, Türkiye",True,86
63,KAHRAMANMARAS oniki subat malik ejder mahall...,37.568597,36.899007,37.573493,36.901854,37.563132,36.893305,"Malik Ejder, 46040 Onikişubat/Kahramanmaraş, T...",True,87
64,YAVUZ SELİM MAHALLESİ 605 SOKAK MERKEZ ADI...,37.764263,38.268468,37.768286,38.272721,37.761858,38.263856,"Yavuz Selim, 02040 Adıyaman Merkez/Adıyaman, T...",True,92
65,Altınova mah. 136046 sok. onikişubat kahra...,37.589509,36.855659,37.590921,36.857081,37.588221,36.854382,Yirmi İki Gün Mahallesi 91039. Sokak Dora Park...,True,94
