[TDX Documentation](https://tdx.transportdata.tw/webapi/File/Swagger/V3/2998e851-81d0-40f5-b26d-77e2f5ac4118)

In [1]:
import pandas as pd
import json
import requests
from dotenv import load_dotenv
import os
load_dotenv()
from collections import Counter


True

In [2]:
class TDX():
    def __init__(self, client_id, client_secret):
        self.client_id = client_id
        self.client_secret = client_secret

    def get_token(self):
        token_url = 'https://tdx.transportdata.tw/auth/realms/TDXConnect/protocol/openid-connect/token'
        headers = {'content-type': 'application/x-www-form-urlencoded'}
        data = {
            'grant_type': 'client_credentials',
            'client_id': self.client_id,
            'client_secret': self.client_secret
        }
        response = requests.post(token_url, headers=headers, data=data)
        # print(response.status_code)
        # print(response.json())
        self.access_token = response.json()['access_token']
        return self.access_token

    def get_response(self, url):
        headers = {'authorization': f'Bearer {self.get_token()}'}
        response = requests.get(url, headers=headers)
        return response.json()



In [3]:
tdx = TDX(os.environ['client_id'],os.environ['client_secret'])

In [4]:
data = tdx.get_response("https://tdx.transportdata.tw/api/basic/v2/Bus/Station/City/Taipei?%24format=JSON")
with open('taipei_bus_station_info.json','w') as f:
    json.dump(data,f,ensure_ascii=False)

In [5]:
df = pd.json_normalize(data)

In [6]:
df.columns

Index(['StationUID', 'StationID', 'StationAddress', 'Stops',
       'LocationCityCode', 'Bearing', 'UpdateTime', 'VersionID',
       'StationName.Zh_tw', 'StationPosition.PositionLon',
       'StationPosition.PositionLat', 'StationPosition.GeoHash'],
      dtype='object')

In [7]:
df['city_code'] = 'TPE'

In [8]:
df['district'] = None

In [9]:
df = df[['StationID','StationName.Zh_tw','StationAddress','city_code',
    'district','StationPosition.PositionLat','StationPosition.PositionLon',
    'Bearing'
    ]]

In [10]:
df = df.rename(
    {'StationID':'bus_station_id',
     'StationName.Zh_tw':'station_name',
     'StationAddress':'address',
     'StationPosition.PositionLat':'lat',
     'StationPosition.PositionLon':'lng',
    'Bearing':'bearing'},axis=1
)

In [11]:
df.to_csv('tpe_bus_station_info.csv',index=False)

In [12]:
df

Unnamed: 0,bus_station_id,station_name,address,city_code,district,lat,lng,bearing
0,10,八勢里,中正東路二段107號(向北),TPE,,25.151230,121.459520,N
1,1000,內湖國中,陽光街1號同向(向東),TPE,,25.076960,121.587840,E
2,1000019,健康新城,健康路246號前(向西),TPE,,25.054217,121.562163,W
3,1000020,長壽公園,健康路153號(向西),TPE,,25.054069,121.559764,W
4,1000021,三軍總醫院松山分院,健康路162號前(向西),TPE,,25.053882,121.557874,W
...,...,...,...,...,...,...,...,...
5300,993,內湖派出所,內湖路二段245號~251號對面(向東),TPE,,25.081788,121.589493,E
5301,994,內湖派出所,內湖路二段257號同向西側(向西),TPE,,25.081987,121.590028,W
5302,997,捷運港墘站(內湖高工),內湖路一段635號同向(向東),TPE,,25.080149,121.574618,E
5303,998,內湖高中,文德路235號對向(向東),TPE,,25.078704,121.587566,E


In [21]:
API_KEY = os.environ['google_map_api_key']
api_results = []
for idx,row in df.iterrows():
    if idx<=5300:
        continue
    lat = row['lat']
    lng = row['lng']
    reverse_geocoding_url = f'https://maps.googleapis.com/maps/api/geocode/json?latlng={lat},{lng}&key={API_KEY}&language=zh-TW'
    try:
        response = requests.get(reverse_geocoding_url)
        data = response.json()
        data['row'] = idx
        api_results.append(data)
        
    except:
        print('failed')
        print(row)
    
    if idx%10==0:
        print(f'{idx} success')
    
    if idx%100==0 and idx>0:
        with open(f'{idx}_result.json','w') as f:
            json.dump(api_results,f,ensure_ascii=False)
        api_results=[]

In [74]:
idx = 100
with open(f'{idx}_result.json','r') as f:
    api_results = json.load(f)

In [89]:

def extract_data(data):
    locations = data['results']
    area_level_2_options = []
    area_level_3_options = []
    if not locations:
        return None, None
    for loc in locations:
        components = loc['address_components']
        for component in components:
            if 'administrative_area_level_2' in component.get('types',''):
                area_level_2_options.append(component['long_name'])
            elif 'administrative_area_level_3' in component.get('types',''):
                area_level_3_options.append(component['long_name'])
    return Counter(area_level_2_options).most_common(n=1)[0][0],Counter(area_level_3_options).most_common(n=1)[0][0]

In [90]:
files = list(range(100,5304,100))+[5304]
for file in files:
    with open(f'{file}_result.json','r') as f:
        api_results = json.load(f)
    for data in api_results:
        level_2, level_3 = extract_data(data)
        df.loc[data['row'],'district'] = level_2
        df.loc[data['row'],'subarea'] = level_3


In [92]:
df.to_csv('tpe_bus_station_info_after_api_call.csv',index=False)

In [100]:
df.drop('subarea',axis=1,inplace=True)

In [102]:

import pandas as pd
from sqlalchemy import create_engine

username = "root"
password = "password"
server = "localhost:55000"
db_name = "group2_db"

engine = create_engine(
    f"mysql+pymysql://{username}:{password}@{server}/{db_name}",
)

In [104]:
df.to_sql('bus_station',engine,if_exists='append',index=False)

5305