In [1]:
# data from https://en.wikipedia.org/wiki/List_of_urban_areas_by_population
# cleaned a bit in Apple numbers

So, the strategy here will be to produce a list of city centers, find the lat lons for those centers using Google Maps queries, find a suitable-sized 'square' box by determining the ratio of degrees (of especially lon) given the latitude of the center point, and by looking at the reported land area. This is not quite getting a bounding box, but it should correspond reasonably well to this list, which certainly doesn't have any bounding boxes.

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("./world_urban_areas_wiki.csv")

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 497 entries, 0 to 496
Data columns (total 6 columns):
Center        497 non-null object
Urban_Area    497 non-null object
Country       497 non-null object
Population    497 non-null object
Area          497 non-null object
Density       497 non-null object
dtypes: object(6)
memory usage: 27.2+ KB


In [5]:
df.Center.value_counts().head()

Hyderabad    2
Valencia     2
Fuzhou       2
Warsaw       1
Stockholm    1
dtype: int64

In [6]:
df[df.Center=="Hyderabad"]

Unnamed: 0,Center,Urban_Area,Country,Population,Area,Density
38,Hyderabad,Hyderabad,India,8754000,1230,7100
155,Hyderabad,Hyderabad,Pakistan,2920000,73,40300


In [7]:
df[df.Center=="Valencia"]

Unnamed: 0,Center,Urban_Area,Country,Population,Area,Density
315,Valencia,Valencia,Spain,1561000,272,5700
331,Valencia,Valencia,Venezuela,1477000,363,4100


In [8]:
df[df.Center=="Fuzhou"]

Unnamed: 0,Center,Urban_Area,Country,Population,Area,Density
98,Fuzhou,Fuzhou,China,3962000,440,9000
464,Fuzhou,Fuzhou,China,1052000,67,15600


In [9]:
df.Center.loc[98] = "Fuzhou, Fujian"
df.Center.loc[464] = "Fuzhou, Jiangxi"

In [10]:
df[df.Center=="Greater Cairo"]

Unnamed: 0,Center,Urban_Area,Country,Population,Area,Density
16,Greater Cairo,Greater Cairo,Egypt,15600000,1761,8900


In [11]:
df.Center.loc[16] = "Cairo"

In [12]:
import numpy as np

In [13]:
df.Country = df.Country.map(lambda x:x.lstrip(" "))

In [14]:
df.Population.loc[0].translate(None, ",")

'37843000'

In [15]:
df.Population = df.Population.map(lambda x: int(x.translate(None, ",")))

In [16]:
df.Area = df.Area.map(lambda x: int(x.translate(None, ",.")))

In [17]:
df.Density = df.Density.map(lambda x: int(x.translate(None, ",.")))

In [18]:
np.sum(df.Population)

1705459474

1.7 billion!

In [19]:
import requests
import shutil

BASEURL = "https://maps.googleapis.com/maps/api/geocode/json?"
KEY="key=AIzaSyAcUHvpeU-08T469Ww0xXRVFUUiJ_sALRM&"

In [20]:
lngs = []
lats = []

for i in range(len(df)):
    r = requests.get(BASEURL+KEY+"&"+"address="+df.Center.loc[i]+","+df.Country.loc[i])
    lng = r.json()['results'][0]['geometry']['location']['lng']
    lat = r.json()['results'][0]['geometry']['location']['lat']
    lngs.append(lng)
    lats.append(lat)

In [21]:
df['lon'] = lngs
df['lat'] = lats

In [22]:
# Now we need to find out how large a box each UA needs
# These come from wiki page for latitude:

# Latitude: 1 deg = 110.574 km
# Longitude: 1 deg = 111.320*cos(latitude) km

# Let's convert latitude to radians...

In [23]:
df['lat_rad'] = df.lat.apply(np.radians)

In [24]:
df['goal_side'] = df.Area.map(lambda x: int(round(np.sqrt(x))))

In [25]:
df['lat_margin'] = (df.goal_side / 110.574) / 2 

In [26]:
df['lon_denom'] = df.lat_rad.map(lambda x: np.cos(x) * 111.320) 

In [27]:
df['lon_margin'] = (df.goal_side / df.lon_denom) / 2

In [28]:
lefts = []
tops = []
rights = []
bottoms = []

for i in range(len(df)):
    left = df.lon.loc[i] - df.lon_margin.loc[i]
    top = df.lat.loc[i] + df.lat_margin.loc[i]
    right = df.lon.loc[i] + df.lon_margin.loc[i]
    bottom = df.lat.loc[i] - df.lat_margin.loc[i]
    
    lefts.append(left)
    tops.append(top)
    rights.append(right)
    bottoms.append(bottom)

df['left'] = lefts
df['top'] = tops
df['right'] = rights
df['bottom'] = bottoms

In [36]:
df.iloc[:,0:8][df.Center=="Athens"]

Unnamed: 0,Center,Urban_Area,Country,Population,Area,Density,lon,lat
122,Athens,Athens,Greece,3484000,583,6000,23.72936,37.983917


In [30]:
df.to_csv("./world_ua_bboxes.csv",index=False)