Air temperature readings at weather-station level, upto one-minute interval

In [7]:
import json
import pandas as pd

In [8]:
with open('temp_1203.json', 'r') as f:
    data = json.load(f)

In [9]:
## Extract stations ##
stations = data['metadata']['stations']
# print(len(stations)) #-- 13
sts = []
for st in range(0,len(stations)):
    temp_ = [stations[st]['device_id'],stations[st]['name'], stations[st]['location']['latitude'],stations[st]['location']['longitude']]
    sts.append(temp_)

# Location and camera info for a day
loc_cam = pd.DataFrame(sts, columns=['device_id', 'name','latitude', 'longitude'])
print(loc_cam)
print(loc_cam.shape)

   device_id                     name  latitude  longitude
0       S109      Ang Mo Kio Avenue 5   1.37640  103.84920
1        S50            Clementi Road   1.33370  103.77680
2       S107       East Coast Parkway   1.31350  103.96250
3        S43           Kim Chuan Road   1.33990  103.88780
4       S108     Marina Gardens Drive   1.27990  103.87030
5        S44           Nanyang Avenue   1.34583  103.68166
6       S121   Old Choa Chu Kang Road   1.37288  103.72244
7       S106               Pulau Ubin   1.41680  103.96730
8       S111              Scotts Road   1.31055  103.83650
9       S115      Tuas South Avenue 3   1.29377  103.61843
10       S24  Upper Changi Road North   1.36780  103.98260
11      S116       West Coast Highway   1.28100  103.75400
12      S104       Woodlands Avenue 9   1.44387  103.78538
13      S100           Woodlands Road   1.41720  103.74855
(14, 4)


In [10]:
## Process for 12 hrs a day ##

values = data['items']
readings = []
for rd in range(0,len(values)):
    r_list = values[rd]['readings']
    time = values[rd]['timestamp']
    for r_ in range(len(r_list)):
        readings.append([time,r_list[r_]['station_id'],r_list[r_]['value']])
        
air_temp_unsort = pd.DataFrame(readings, columns=['time_stamp', 'device_id','value'])

air_temp = air_temp_unsort.sort_values(by=['device_id'])

print(air_temp[:6])
print(air_temp.shape)


g = air_temp.groupby('device_id').cumcount()
val_hr = air_temp.groupby(['device_id',g // 60]).mean().reset_index(level=1, drop=True).reset_index()
print(val_hr.shape) #(14*24)

                      time_stamp device_id  value
19928  2022-03-12T23:59:00+08:00      S100   26.9
4761   2022-03-12T05:42:00+08:00      S100   25.5
17808  2022-03-12T21:27:00+08:00      S100   28.0
12860  2022-03-12T15:30:00+08:00      S100   34.3
12846  2022-03-12T15:29:00+08:00      S100   34.6
4775   2022-03-12T05:43:00+08:00      S100   25.5
(19929, 3)
(334, 2)


In [11]:
T1 = pd.merge(loc_cam, val_hr, on='device_id', how='inner')
print(T1)

## 24 hrs aggregated air temp value from 14 cams for a day --> 336 rows x 5 columns ## 

    device_id                 name  latitude  longitude      value
0        S109  Ang Mo Kio Avenue 5    1.3764  103.84920  28.606667
1        S109  Ang Mo Kio Avenue 5    1.3764  103.84920  28.296667
2        S109  Ang Mo Kio Avenue 5    1.3764  103.84920  28.143333
3        S109  Ang Mo Kio Avenue 5    1.3764  103.84920  28.446667
4        S109  Ang Mo Kio Avenue 5    1.3764  103.84920  29.196667
..        ...                  ...       ...        ...        ...
329      S100       Woodlands Road    1.4172  103.74855  28.370000
330      S100       Woodlands Road    1.4172  103.74855  28.325000
331      S100       Woodlands Road    1.4172  103.74855  28.228333
332      S100       Woodlands Road    1.4172  103.74855  28.305000
333      S100       Woodlands Road    1.4172  103.74855  28.075439

[334 rows x 5 columns]


In [12]:
T1.to_csv('temperature.csv', sep='\t')