In [1]:
#### Get Road Distance from BaiduMap API ####
# Programmer : Dan Qin
# Date       : 2018.06.18
# Data source: Ministry of Civil Affairs of the People's Republic of China
# (http://www.mca.gov.cn/article/sj/xzqh/2018/)

# This code use BaiduMap API to retrieve coordinates for prefectures in China 
# and then calculate the road distances of all prefecture pairs

# import libraries
import requests
import pandas as pd
import numpy as np

In [None]:
# retrieve list of prefectures and store in 'pref_list'(omitted)

In [None]:
### Retrieve coordinates of prefectures ###
# create a df to store coordinates
pref_co = pd.DataFrame(index = np.arange(len(pref_list)),
                       columns = ["prefecture","lat","lon","coord"])
pref_co["prefecture"] = pd.Series(pref_list)

# loop through prefecture list
for i in range(len(pref_co.index)):
    # request for the city's coordinates 
    prefname = pref_co.prefecture[i]
    c_endpoint = "http://api.map.baidu.com/geocoder/v2/?"
    c_params = {"address": prefname,
                "output": "json",
                "ak": "******"}

    get_coord = requests.get(c_endpoint, params=c_params)
    
    # store the coordinates in df
    result = get_coord.json()
    lat = result['result']['location']['lat']
    lon = result['result']['location']['lng']
    coord = str(lat)[:9]+","+str(lon)[:9]
    
    pref_co.lat.iat[i] = lat
    pref_co.lon.iat[i] = lon
    pref_co.coord.iat[i] = coord

In [30]:
# check
pref_co.head()

Unnamed: 0,prefecture,lat,lon
0,重庆市,29.544606,106.530635
1,保定市,38.886565,115.49481
2,石家庄市,38.048958,114.522082
3,成都市,30.679943,104.067923
4,邢台市,37.069531,114.520487


In [4]:
### Calculate distance between prefectures ###
# create a combination list of prefecture pairs
import itertools
pair_list = []
for subset in itertools.combinations(pref_list,2):
    pair_list.append(subset)

In [38]:
# df to store distances 
index = np.arange(len(pair_list))
columns = ['OriCity','DesCity']
pref_pairs = pd.DataFrame(index = index ,columns = columns)

# as pandas is slow with loops, use lists to store the data instead
OriCity = [None] * len(pair_list)
DesCity = [None] * len(pair_list)

for i in range(len(pair_list)):
    OriCity[i] = pair_list[i][0]
    DesCity[i] = pair_list[i][1]
    
pref_pairs["OriCity"] = OriCity
pref_pairs["DesCity"] = DesCity

# merge with coordinates df
pref_pairs = pd.merge(pref_pairs, pref_co[["prefecture","coord"]],how="left",
                       left_on="OriCity",right_on="prefecture")    
pref_pairs = pd.merge(pref_pairs, pref_co[["prefecture","coord"]],how="left",
                       left_on="DesCity",right_on="prefecture")  
pref_pairs = pref_pairs.drop(columns=["prefecture_x","prefecture_y"])

pref_pairs.columns = ['OriCity','DesCity','OriCoord','DesCoord']
pref_pairs.head()

Unnamed: 0,OriCity,DesCity,OriCoord,DesCoord
0,重庆市,保定市,"29.544606,106.53063","38.886564,115.49481"
1,重庆市,石家庄市,"29.544606,106.53063","38.048958,114.52208"
2,重庆市,成都市,"29.544606,106.53063","30.679942,104.06792"
3,重庆市,邢台市,"29.544606,106.53063","37.069531,114.52048"
4,重庆市,赣州市,"29.544606,106.53063","25.845295,114.93590"


In [1]:
# Retrieve distance by BaiduMap RouteMatrix API 
# limit of API is 30,000 calls/day

for i in range(len(Distance)):
    origin = pref_pairs.OriCoord[i]
    destination = pref_pairs.DesCoord[i]
            
    try:
        d_endpoint = "https://api.map.baidu.com/direction/v1/routematrix?"
        d_params ={"origins":origin,
                    "destinations":destination,
                    "output":"json",
                    "ak":"*******"}
        get_dist = requests.get(d_endpoint, params=d_params)
        
        # retrieve distances from the results
        result = get_dist.json()  
        
        if result['status'] == 0:
            Distance[i] = result['result']['elements'][0]['distance']['value']
            print(str(i) + result['message'])
        else:
            pass
    except:
        pass   
    
            

In [127]:
# OPTION II: TEST with Direction API(slower than RouteMatrix)
# for i in range(55,100):  
#     lat = pref_pairs.OriCoord[i].split(",")[0][:8]
#     lon = pref_pairs.OriCoord[i].split(",")[1][:9]    
#     origin = str(lat)+","+str(lon)
    
#     dlat = pref_pairs.DesCoord[i].split(",")[0][:8]
#     dlon = pref_pairs.DesCoord[i].split(",")[1][:9] 
#     destination = str(dlat)+","+str(dlon)    
            
#     d_endpoint = "http://api.map.baidu.com/direction/v2/driving?"
#     d_params ={"origin":origin,
#                 "destination":destination,
#                 "output":"json",
#                 "ak":"********"}
#     get_dist = requests.get(d_endpoint, params=d_params)
        
#     # retrieve distances from the results
#     result = get_dist.json()
    
#     print(str(i)+";"+result['message'])
    
#     Distance[i] = result['result']['routes'][0]['distance']

# pref_pairs.head() 

In [11]:
# add the column to dataframe
pref_pairs["Distance"] = Distance
pref_pairs.tail()

Unnamed: 0,OriCity,DesCity,OriCoord,DesCoord,Distance
56948,三沙市,嘉峪关市,"12.464712,113.75535","39.802397,98.281634",0.0
56949,三沙市,中山市,"12.464712,113.75535","22.545177,113.42206",0.0
56950,儋州市,嘉峪关市,"19.574787,109.33458","39.802397,98.281634",3353008.0
56951,儋州市,中山市,"19.574787,109.33458","22.545177,113.42206",697260.0
56952,嘉峪关市,中山市,"39.802397,98.281634","22.545177,113.42206",3073535.0


In [44]:
### Calculate Distance Within Prefectures ###
# create a new df "pref_pairs_r" to store the within prefecture distances
index = np.arange(len(pref_list))
columns = ['OriCity','DesCity']
pref_pairs_r = pd.DataFrame(index = index ,columns = columns)

pref_pairs_r["OriCity"] = pref_list
pref_pairs_r["DesCity"] = pref_list

pref_pairs_r = pd.merge(pref_pairs_r, pref_co[["prefecture","coord"]],how="left",
                       left_on="OriCity",right_on="prefecture")    
pref_pairs_r = pd.merge(pref_pairs_r, pref_co[["prefecture","coord"]],how="left",
                       left_on="DesCity",right_on="prefecture")  
pref_pairs_r = pref_pairs_r.drop(columns=["prefecture_x","prefecture_y"])

pref_pairs_r.columns = ['OriCity','DesCity','OriCoord','DesCoord']
pref_pairs_r.head(1)

Unnamed: 0,OriCity,DesCity,OriCoord,DesCoord
0,重庆市,重庆市,"29.544606,106.53063","29.544606,106.53063"


In [12]:
# Calculate the average of distances between counties within a prefecture

# drop province and prefecture rows in division df
data_r = data.dropna(subset = ["县级"])

# list of prefectures and county combinations
pref_c = []
county_1 = []
county_2 = []

# loop through the prefecture list
for pref in pref_list:
    # subset of counties in the prefecture
    pref_sub = data_r.loc[data_r.地级 == pref]
    clist = pref_sub.县级.tolist()
    
    # get the combination of all counties
    for subset in itertools.combinations(clist,2):
        pref_c.append(pref)
        county_1.append(subset[0])
        county_2.append(subset[1])
    
# create a df of county combinations
c_pairs = pd.DataFrame(index = range(len(pref_c)), columns=["Pref","County_1","County_2"])
c_pairs["Pref"] = pref_c
c_pairs["County_1"] = county_1
c_pairs["County_2"] = county_2

c_pairs.head(1)

Unnamed: 0,Pref,County_1,County_2
0,重庆市,渝中区,万州区


In [2]:
# METHOD II(continue)
# list to store distance values
Distance = [np.nan]*len(c_pairs.index)

# loop through df    
for i in range(0,len(Distance)):
    origin = c_pairs.County_1[i]
    destination = c_pairs.County_2[i]
    
    # calculate the road distance between counties
    try:
        d_endpoint = "https://api.map.baidu.com/direction/v1/routematrix?"
        d_params ={"origins":origin,
                    "destinations":destination,
                    "output":"json",
                    "ak":"*******"}
        get_dist = requests.get(d_endpoint, params=d_params)
        
        # retrieve distances from the results
        result = get_dist.json()  
        
        if result['status'] == 0:
            Distance[i] = result['result']['elements'][0]['distance']['value']
            print(str(i) + result['message'])
        else:
            pass
    except:
        pass   
    
c_pairs["Distance"] = Distance
c_pairs.head(1)

In [24]:
# METHOD II(continue)
# list to store distance values
Distance = [np.nan]*len(pref_pairs_r.index)
i = 0

# loop through df
for row in pref_pairs_r.itertuples():
    pref = row.OriCity
    
    # subset of counties in the prefecture
    pref_sub = c_pairs.loc[c_pairs.Pref == pref]
    
    # calculate the mean distance within each prefecture
    dist_avr = pref_sub.Distance.mean()
    Distance[i] = dist_avr
    
    i += 1
    
pref_pairs_r["Distance"] = Distance
pref_pairs_r.head(1)

Unnamed: 0,OriCity,DesCity,OriCoord,DesCoord,Distance
0,重庆市,重庆市,"29.544606,106.53063","29.544606,106.53063",226018.015038


In [78]:
# concat the dataframes
pref_pairs_rr = pd.concat([pref_pairs, pref_pairs_r])

In [7]:
# save the dataframes to files
pref_co.to_csv("data/CH_prefecture_coordinate.csv", index=False, encoding="utf_8_sig")
pref_pairs_rrr.to_csv("data/CH_prefecture_distance.csv", index=False, encoding="utf_8_sig")