In [1]:
import requests
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

pd.set_option('display.max_columns', None)

In [2]:
def get_data(id:str):
    headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
    }
    url = f'https://sozluk.gov.tr/gts_id?id={id}'
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        json_data = response.json()
        if 'error' in json_data and json_data['error'] == "Sonuç bulunamadı":
            return None
        if len(json_data) > 0:
            return json_data[0]
    return None

def create_dataframe_parallel(min_id:int=1, max_id:int=92412) -> pd.DataFrame:
    data = []
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(get_data, id) for id in range(min_id, max_id + 1)]
        progress_bar = tqdm(total=len(futures))
        for idx, future in enumerate(futures):
            result = future.result()
            if result is not None:
                data.append(result)
            progress_bar.update(1)
        progress_bar.close()

    df = pd.DataFrame(data)
    return df

## Count of each Work

In [4]:
92412 / 8000

11.5515

In [6]:
df1 = create_dataframe_parallel(min_id=1, max_id=8000)

100%|██████████| 8000/8000 [03:53<00:00, 34.28it/s]


In [10]:
df2 = create_dataframe_parallel(min_id=8001, max_id=16001)

100%|██████████| 8001/8001 [03:50<00:00, 34.77it/s]


In [14]:
df3 = create_dataframe_parallel(min_id=16002, max_id=24002)

100%|██████████| 8001/8001 [03:57<00:00, 33.67it/s]  


In [17]:
df4 = create_dataframe_parallel(min_id=24003, max_id=32003)

100%|██████████| 8001/8001 [03:53<00:00, 34.22it/s]


In [26]:
df5 = create_dataframe_parallel(min_id=32004, max_id=40004)

100%|██████████| 8001/8001 [03:54<00:00, 34.16it/s]


In [30]:
df6 = create_dataframe_parallel(min_id=40005, max_id=48005)

100%|██████████| 8001/8001 [03:56<00:00, 33.82it/s]


In [5]:
df7 = create_dataframe_parallel(min_id=48006, max_id=56006)

100%|██████████| 8001/8001 [04:10<00:00, 31.88it/s]


In [14]:
df8 = create_dataframe_parallel(min_id=56007, max_id=64007)

100%|██████████| 8001/8001 [04:18<00:00, 30.98it/s]


In [16]:
df9 = create_dataframe_parallel(min_id=64008, max_id=72008)

100%|██████████| 8001/8001 [04:18<00:00, 30.96it/s]


In [19]:
df10 = create_dataframe_parallel(min_id=72009, max_id=80009)

100%|██████████| 8001/8001 [04:22<00:00, 30.44it/s]


In [4]:
df11 = create_dataframe_parallel(min_id=80010, max_id=88010)

100%|██████████| 8001/8001 [04:02<00:00, 32.93it/s]


In [21]:
df12 = create_dataframe_parallel(min_id=88010, max_id=92413)

100%|██████████| 4404/4404 [02:31<00:00, 29.03it/s]
