In [1]:
import pandas as pd
from pathlib import Path

from hotcities.config import read_config, default_config
from hotcities.readers import load
from hotcities.filters import cities_filter, countries_filter, alternatenames_filter
from hotcities.mergers import merge

In [2]:
out_file = Path('../data/cities.csv')
min_population = 500000

if not out_file.exists():
    print('Reading cities data...')
    cities = load('cities', filter=cities_filter(
        min_population=min_population), config=default_config)
    print(cities)
    print('Reading coutries data...')
    countries = load('countries', filter=countries_filter(),
                      config=default_config)
    print(countries)
    alternatenames = load('alternatenames', filter=alternatenames_filter(
    ), config=default_config, low_memory=False)
    print('Reading alternate names data...')
    print(alternatenames)
    print('Merging data...')
    data = merge(cities, countries, alternatenames)
    print(data)
    if out_file:
        data.to_csv(out_file, index=False)
        print(f'Data saved to {out_file}')
    else:
        print(data.to_csv(index=False))

data = pd.read_csv(out_file)

Reading cities data...
       geonameid            name countrycode  population       lat       lng  \
6         292223           Dubai          AE     3478300  25.07725  55.30927   
9         292672         Sharjah          AE     1274749  25.33737  55.41206   
12        292913     Al Ain City          AE      846747  24.19167  55.76056   
15        292968       Abu Dhabi          AE     1807000  24.45118  54.39696   
38       1133616  Mazār-e Sharīf          AF      523300  36.70904  67.11087   
...          ...             ...         ...         ...       ...       ...   
26377    1020098          Benoni          ZA      605344 -26.18848  28.32078   
26401    3369157       Cape Town          ZA     4710000 -33.92584  18.42322   
26423     909137          Lusaka          ZM     1267440 -15.40669  28.28713   
26452     890299          Harare          ZW     1542813 -17.82772  31.05337   
26460     894701        Bulawayo          ZW     1200337 -20.15000  28.58333   

                

In [4]:
data[data['countryname'] == 'Saudi Arabia'].sort_values(by='population', ascending=False)

Unnamed: 0,geonameid,name,countrycode,population,lat,lng,timezone,countryname,lang,localname
886,105343,Jeddah,SA,4697000,21.49012,39.18624,Asia/Riyadh,Saudi Arabia,ar,جدة
890,108410,Riyadh,SA,4205961,24.68773,46.72185,Asia/Riyadh,Saudi Arabia,ar,الرياض
885,104515,Mecca,SA,1578722,21.42664,39.82563,Asia/Riyadh,Saudi Arabia,ar,مكة المكرمة
891,109223,Medina,SA,1300000,24.46861,39.61417,Asia/Riyadh,Saudi Arabia,ar,المدينة
892,110336,Dammam,SA,1252523,26.43442,50.10326,Asia/Riyadh,Saudi Arabia,ar,الدمام
883,101760,Sulţānah,SA,946697,24.49258,39.58572,Asia/Riyadh,Saudi Arabia,ar,
888,107304,Buraydah,SA,745353,26.32599,43.97497,Asia/Riyadh,Saudi Arabia,ar,بُرَيدَة
889,107968,Ta’if,SA,688693,21.27028,40.41583,Asia/Riyadh,Saudi Arabia,ar,الطائف
882,101628,Tabuk,SA,667000,28.3998,36.57151,Asia/Riyadh,Saudi Arabia,ar,تبوك
887,106281,Ha'il,SA,605930,27.52188,41.69073,Asia/Riyadh,Saudi Arabia,ar,حائل
