In [3]:
import urllib.request
import json
import pandas as pd
import numpy as np

https://opendata.cwb.gov.tw/dataset/observation/O-A0001-001

In [4]:
url = 'https://opendata.cwb.gov.tw/fileapi/v1/opendataapi/{dataid}?Authorization={apikey}&format={data_type}'
dataid = 'O-A0001-001'
apikey = "CWB-994C83F1-EF6F-4210-A67A-8A688C6219D3"
data_type = 'json'
link = url.format(dataid=dataid, apikey=apikey, data_type=data_type)
#link
link

'https://opendata.cwb.gov.tw/fileapi/v1/opendataapi/O-A0001-001?Authorization=CWB-994C83F1-EF6F-4210-A67A-8A688C6219D3&format=json'

In [5]:
#url path
urllib.request.urlretrieve(link,'{}.{}'.format(dataid, data_type))

('O-A0001-001.json', <http.client.HTTPMessage at 0xd4747437c8>)

In [6]:
with open('{}.{}'.format(dataid, data_type), 'r', encoding='utf-8-sig') as file:
    data = json.load(file)

In [7]:
data.keys()

dict_keys(['cwbopendata'])

In [8]:
weather_tmp = data["cwbopendata"]["location"]
weather_tmp

[{'lat': '25.035950',
  'lon': '121.611456',
  'lat_wgs84': '25.0341638888889',
  'lon_wgs84': '121.619680555556',
  'locationName': '國三南深路交流道',
  'stationId': 'CM0010',
  'time': {'obsTime': '2019-10-19T19:00:00+08:00'},
  'weatherElement': [{'elementName': 'ELEV',
    'elementValue': {'value': '68.0'}},
   {'elementName': 'WDIR', 'elementValue': {'value': '83'}},
   {'elementName': 'WDSD', 'elementValue': {'value': '0.8'}},
   {'elementName': 'TEMP', 'elementValue': {'value': '21.4'}},
   {'elementName': 'HUMD', 'elementValue': {'value': '0.8'}},
   {'elementName': 'PRES', 'elementValue': {'value': '1012.0'}},
   {'elementName': 'H_24R', 'elementValue': {'value': '10.5'}},
   {'elementName': 'H_FX', 'elementValue': {'value': '-99'}},
   {'elementName': 'H_XD', 'elementValue': {'value': '-99'}},
   {'elementName': 'H_FXT', 'elementValue': {'value': '-99'}},
   {'elementName': 'D_TX', 'elementValue': {'value': '22.70'}},
   {'elementName': 'D_TXT',
    'elementValue': {'value': '2019-1

In [9]:
# collect lat, lon, and locationName
lat = []
lon = []
locationName = []
for district in weather_tmp:
    lat.append(district["lat"])
    lon.append(district["lon"])
    locationName.append(district["locationName"])

In [10]:
len(data["cwbopendata"]["location"])

424

In [11]:
# collect city & town
location_lst = []
for i in range(len(data["cwbopendata"]["location"])):
    location = [[element["parameterValue"]] for element in data["cwbopendata"]["location"][i]["parameter"]]
    location_lst.append(location)

In [24]:
# collect column names of city & town
location_col =  [element["parameterName"] for element in data["cwbopendata"]["location"][0]["parameter"]]
location_col

['CITY', 'CITY_SN', 'TOWN', 'TOWN_SN']

In [13]:
# flattening location_lst & location_col
location_list = [item for sublist in location_lst for item in sublist]
location_col = [item for sublist in location_col for item in sublist]
location_list
location_col

['CITY', 'CITY_SN', 'TOWN', 'TOWN_SN']

In [14]:
# change location_list to np.array to reshape it
location_list = np.array(location_list).reshape(-1, 4)
location_list

array([['臺北市', '01', '南港區', '035'],
       ['新竹縣', '10', '竹北市', '064'],
       ['彰化縣', '14', '鹿港鎮', '140'],
       ...,
       ['雲林縣', '16', '水林鄉', '204'],
       ['臺南市', '04', '北門區', '237'],
       ['臺南市', '04', '七股區', '253']], dtype='<U4')

In [15]:
# create location_df
location_df = pd.DataFrame(location_list)
location_df.columns = location_col
location_df.head()

Unnamed: 0,CITY,CITY_SN,TOWN,TOWN_SN
0,臺北市,1,南港區,35
1,新竹縣,10,竹北市,64
2,彰化縣,14,鹿港鎮,140
3,雲林縣,16,臺西鄉,187
4,臺南市,4,七股區,253


In [16]:
# collect weather-related column names
weather_col = [[element["elementName"]] for element in data["cwbopendata"]["location"][0]["weatherElement"]]
weather_col

[['ELEV'],
 ['WDIR'],
 ['WDSD'],
 ['TEMP'],
 ['HUMD'],
 ['PRES'],
 ['H_24R'],
 ['H_FX'],
 ['H_XD'],
 ['H_FXT'],
 ['D_TX'],
 ['D_TXT'],
 ['D_TN'],
 ['D_TNT']]

In [17]:
# collect weather-related data
weatherElement = []
for i in range(len(data["cwbopendata"]["location"])):
    weather = [[element["elementValue"]["value"]] for element in data["cwbopendata"]["location"][i]["weatherElement"]]
    weatherElement.append(weather)

In [18]:
# flattening weather_list & weatherElement
weather_list = [item for sublist in weather_col for item in sublist]
weatherElement = [item for sublist in weatherElement for item in sublist]
weatherElement

[['68.0'],
 ['83'],
 ['0.8'],
 ['21.4'],
 ['0.8'],
 ['1012.0'],
 ['10.5'],
 ['-99'],
 ['-99'],
 ['-99'],
 ['22.70'],
 ['2019-10-19T12:50:00+08:00'],
 ['20.50'],
 ['2019-10-19T06:30:00+08:00'],
 ['25.0'],
 ['27'],
 ['2.6'],
 ['23.2'],
 ['0.68'],
 ['1015.3'],
 ['0.0'],
 ['10.3'],
 ['78'],
 ['2019-10-19T18:34:00+08:00'],
 ['24.90'],
 ['2019-10-19T09:40:00+08:00'],
 ['21.50'],
 ['2019-10-19T04:40:00+08:00'],
 ['7.0'],
 ['47'],
 ['5.3'],
 ['23.6'],
 ['0.69'],
 ['1015.5'],
 ['0.0'],
 ['12.1'],
 ['52'],
 ['2019-10-19T18:21:00+08:00'],
 ['26.40'],
 ['2019-10-19T10:40:00+08:00'],
 ['22.00'],
 ['2019-10-19T02:10:00+08:00'],
 ['4.0'],
 ['43'],
 ['7.9'],
 ['24.1'],
 ['0.69'],
 ['1015.4'],
 ['0.0'],
 ['15.3'],
 ['63'],
 ['2019-10-19T18:56:00+08:00'],
 ['25.40'],
 ['2019-10-19T12:00:00+08:00'],
 ['22.10'],
 ['2019-10-19T03:20:00+08:00'],
 ['11.0'],
 ['3'],
 ['4.7'],
 ['24.5'],
 ['0.72'],
 ['1015.1'],
 ['0.0'],
 ['-99'],
 ['-99'],
 ['-99'],
 ['26.30'],
 ['2019-10-19T12:30:00+08:00'],
 ['21.50'],
 ['2

In [19]:
# change weatherElement to array to reshape it
weatherElement = np.array(weatherElement).reshape(-1, 14)
weatherElement

array([['68.0', '83', '0.8', ..., '2019-10-19T12:50:00+08:00', '20.50',
        '2019-10-19T06:30:00+08:00'],
       ['25.0', '27', '2.6', ..., '2019-10-19T09:40:00+08:00', '21.50',
        '2019-10-19T04:40:00+08:00'],
       ['7.0', '47', '5.3', ..., '2019-10-19T10:40:00+08:00', '22.00',
        '2019-10-19T02:10:00+08:00'],
       ...,
       ['15.0', '6', '3.1', ..., '2019-10-19T12:00:00+08:00', '20.60',
        '2019-10-19T03:30:00+08:00'],
       ['10.0', '25', '6.6', ..., '2019-10-19T12:20:00+08:00', '21.20',
        '2019-10-19T03:50:00+08:00'],
       ['9.0', '11', '7.0', ..., '2019-10-19T12:50:00+08:00', '21.00',
        '2019-10-19T05:30:00+08:00']], dtype='<U25')

In [20]:
# create weather_df
weather_df = pd.DataFrame(weatherElement)
weather_df.columns = weather_list
weather_df

Unnamed: 0,ELEV,WDIR,WDSD,TEMP,HUMD,PRES,H_24R,H_FX,H_XD,H_FXT,D_TX,D_TXT,D_TN,D_TNT
0,68.0,83,0.8,21.4,0.8,1012.0,10.5,-99,-99,-99,22.70,2019-10-19T12:50:00+08:00,20.50,2019-10-19T06:30:00+08:00
1,25.0,27,2.6,23.2,0.68,1015.3,0.0,10.3,78,2019-10-19T18:34:00+08:00,24.90,2019-10-19T09:40:00+08:00,21.50,2019-10-19T04:40:00+08:00
2,7.0,47,5.3,23.6,0.69,1015.5,0.0,12.1,52,2019-10-19T18:21:00+08:00,26.40,2019-10-19T10:40:00+08:00,22.00,2019-10-19T02:10:00+08:00
3,4.0,43,7.9,24.1,0.69,1015.4,0.0,15.3,63,2019-10-19T18:56:00+08:00,25.40,2019-10-19T12:00:00+08:00,22.10,2019-10-19T03:20:00+08:00
4,11.0,3,4.7,24.5,0.72,1015.1,0.0,-99,-99,-99,26.30,2019-10-19T12:30:00+08:00,21.50,2019-10-19T05:20:00+08:00
5,26.0,68,2.8,24.0,0.76,1014.6,2.5,8.1,67,2019-10-19T18:49:00+08:00,27.60,2019-10-19T12:20:00+08:00,23.50,2019-10-19T18:30:00+08:00
6,11.0,7,0.6,24.7,0.7,1015.4,0.0,-99,-99,-99,28.40,2019-10-19T12:20:00+08:00,20.70,2019-10-19T04:20:00+08:00
7,159.0,18,1.3,22.2,0.77,-99,0.0,-99,-99,-99,30.90,2019-10-19T12:40:00+08:00,19.40,2019-10-19T02:00:00+08:00
8,106.0,276,0.3,22.2,0.84,1004.5,0.0,-99,-99,-99,27.70,2019-10-19T14:00:00+08:00,19.80,2019-10-19T01:10:00+08:00
9,313.0,0,0.1,24.4,0.72,980.3,0.0,-99,-99,-99,31.10,2019-10-19T13:40:00+08:00,19.90,2019-10-19T06:00:00+08:00


In [23]:
# combine all dataframes above
pd1 = pd.DataFrame({"locationName" : locationName, "lat": lat, "lon": lon})
final_df = pd.concat([pd1, location_df, weather_df ], axis = 1 )
final_df.head()

Unnamed: 0,locationName,lat,lon,CITY,CITY_SN,TOWN,TOWN_SN,ELEV,WDIR,WDSD,...,HUMD,PRES,H_24R,H_FX,H_XD,H_FXT,D_TX,D_TXT,D_TN,D_TNT
0,國三南深路交流道,25.03595,121.611456,臺北市,1,南港區,35,68.0,83,0.8,...,0.8,1012.0,10.5,-99.0,-99,-99,22.7,2019-10-19T12:50:00+08:00,20.5,2019-10-19T06:30:00+08:00
1,水試所竹北,24.8525,120.998167,新竹縣,10,竹北市,64,25.0,27,2.6,...,0.68,1015.3,0.0,10.3,78,2019-10-19T18:34:00+08:00,24.9,2019-10-19T09:40:00+08:00,21.5,2019-10-19T04:40:00+08:00
2,水試所鹿港,24.091036,120.428836,彰化縣,14,鹿港鎮,140,7.0,47,5.3,...,0.69,1015.5,0.0,12.1,52,2019-10-19T18:21:00+08:00,26.4,2019-10-19T10:40:00+08:00,22.0,2019-10-19T02:10:00+08:00
3,水試所臺西,23.718408,120.183736,雲林縣,16,臺西鄉,187,4.0,43,7.9,...,0.69,1015.4,0.0,15.3,63,2019-10-19T18:56:00+08:00,25.4,2019-10-19T12:00:00+08:00,22.1,2019-10-19T03:20:00+08:00
4,水試所七股,23.12255,120.071589,臺南市,4,七股區,253,11.0,3,4.7,...,0.72,1015.1,0.0,-99.0,-99,-99,26.3,2019-10-19T12:30:00+08:00,21.5,2019-10-19T05:20:00+08:00
