Precipitation readings at weather-station level, updated every five minutes

In [7]:
import json
import pandas as pd

In [8]:
with open('rain_1203.json', 'r') as f:
    data = json.load(f)

In [9]:
## Extract stations ##
stations = data['metadata']['stations']
# print(len(stations)) #-- 13
sts = []
for st in range(0,len(stations)):
    temp = [stations[st]['device_id'],stations[st]['name'], stations[st]['location']['latitude'],stations[st]['location']['longitude']]
    sts.append(temp)

# Location and camera info for a day
loc_cam = pd.DataFrame(sts, columns=['device_id', 'name','latitude', 'longitude'])
print(loc_cam)
print(loc_cam.shape)

   device_id                    name  latitude  longitude
0        S77          Alexandra Road   1.29370  103.81250
1       S109     Ang Mo Kio Avenue 5   1.37640  103.84920
2        S90        Bukit Timah Road   1.31910  103.81910
3       S114  Choa Chu Kang Avenue 4   1.38000  103.73000
4        S50           Clementi Road   1.33370  103.77680
..       ...                     ...       ...        ...
63       S36    Upper Serangoon Road   1.33820  103.86570
64       S08      Upper Thomson Road   1.37010  103.82710
65      S116      West Coast Highway   1.28100  103.75400
66      S104      Woodlands Avenue 9   1.44387  103.78538
67      S100          Woodlands Road   1.41720  103.74855

[68 rows x 4 columns]
(68, 4)


In [10]:
## Process for 12 hrs a day ##

values = data['items']

readings = []
for rd in range(0,len(values)):
    r_list = values[rd]['readings']
    time = values[rd]['timestamp']
    for r_ in range(len(r_list)):
        readings.append([time,r_list[r_]['station_id'],r_list[r_]['value']])
        
rain_unsort = pd.DataFrame(readings, columns=['time_stamp', 'device_id','value'])

rain = rain_unsort.sort_values(by=['device_id'])

print(rain[:6])
print(rain.shape)

g = rain.groupby('device_id').cumcount()
val_hr = rain.groupby(['device_id',g // 12]).mean().reset_index(level=1, drop=True).reset_index()
print(val_hr.shape) #(14*24)

                      time_stamp device_id  value
12935  2022-03-12T16:05:00+08:00       S08    0.0
12265  2022-03-12T15:15:00+08:00       S08    0.0
8179   2022-03-12T10:10:00+08:00       S08    0.0
3150   2022-03-12T03:55:00+08:00       S08    0.0
12332  2022-03-12T15:20:00+08:00       S08    0.0
8112   2022-03-12T10:05:00+08:00       S08    0.0
(19237, 3)
(1609, 2)


In [11]:
T1 = pd.merge(loc_cam, val_hr, on='device_id', how='inner')
print(T1)

     device_id            name  latitude  longitude  value
0          S77  Alexandra Road    1.2937  103.81250    0.0
1          S77  Alexandra Road    1.2937  103.81250    0.0
2          S77  Alexandra Road    1.2937  103.81250    0.0
3          S77  Alexandra Road    1.2937  103.81250    0.0
4          S77  Alexandra Road    1.2937  103.81250    0.0
...        ...             ...       ...        ...    ...
1604      S100  Woodlands Road    1.4172  103.74855    0.0
1605      S100  Woodlands Road    1.4172  103.74855    0.0
1606      S100  Woodlands Road    1.4172  103.74855    0.0
1607      S100  Woodlands Road    1.4172  103.74855    0.0
1608      S100  Woodlands Road    1.4172  103.74855    0.0

[1609 rows x 5 columns]


In [12]:
T1.to_csv('rain.csv', sep='\t')