# Intro 

This notebook is to come up with a mapping for each camera location to the neaerest sensor reading

In [1]:
import pandas as pd 

import json
import requests

from math import sin, cos, sqrt, atan2, radians

In [2]:
# Helper function 
def get_distance(lat1, lat2, lon1, lon2):
    """
    Code formula taken from 
    https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude
    """
    
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    R = 6373.0
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c

    return distance 

In [3]:
air_temp = pd.read_csv("air_temp.csv")
air_temp.head()

Unnamed: 0.1,Unnamed: 0,air_temp_realtime,station_id,station_loc,station_name,timestamp
0,0,28.0,S107,"(1.3135, 103.9625)",East Coast Parkway,2022-03-25T220000
1,1,27.7,S108,"(1.2799, 103.8703)",Marina Gardens Drive,2022-03-25T220000
2,2,26.7,S44,"(1.34583, 103.68166)",Nanyang Avenue,2022-03-25T220000
3,3,25.3,S106,"(1.4168, 103.9673)",Pulau Ubin,2022-03-25T220000
4,4,26.8,S24,"(1.3678, 103.9826)",Upper Changi Road North,2022-03-25T220000


In [4]:
air_temp.station_id.unique()

array(['S107', 'S108', 'S44', 'S106', 'S24'], dtype=object)

In [5]:
air_temp = air_temp[["station_id", "station_loc"]].drop_duplicates()

In [6]:
wind_dir = pd.read_csv("wind_dir.csv")
wind_dir = wind_dir[["station_id", "station_loc"]].drop_duplicates()

wind_spd = pd.read_csv("wind_speed.csv")
wind_spd = wind_spd[["station_id", "station_loc"]].drop_duplicates()

rh = pd.read_csv("RH%.csv")
rh = rh[["station_id", "station_loc"]].drop_duplicates()

In [7]:
print((air_temp == wind_spd).all())
print((air_temp == wind_dir).all())
print((air_temp == rh).all())

station_id     True
station_loc    True
dtype: bool
station_id     True
station_loc    True
dtype: bool
station_id     True
station_loc    True
dtype: bool


In [8]:
air_temp["not_rain_lat"] = air_temp.station_loc.apply(lambda x: float(x.split(", ")[0][1:]))
air_temp["not_rain_lon"] = air_temp.station_loc.apply(lambda x: float(x.split(", ")[1][:-1]))

In [9]:
not_rainfall = air_temp.rename(columns = {"station_id" : "not_rainfall"}).drop("station_loc", axis = 1)
not_rainfall

Unnamed: 0,not_rainfall,not_rain_lat,not_rain_lon
0,S107,1.3135,103.9625
1,S108,1.2799,103.8703
2,S44,1.34583,103.68166
3,S106,1.4168,103.9673
4,S24,1.3678,103.9826


In [10]:
rainfall = pd.read_csv("rainfall.csv")
rainfall = rainfall[["station_id", "station_loc"]].drop_duplicates()

In [11]:
rainfall["rain_lat"] = rainfall.station_loc.apply(lambda x: float(x.split(", ")[0][1:]))
rainfall["rain_lon"] = rainfall.station_loc.apply(lambda x: float(x.split(", ")[1][:-1]))

In [12]:
rainfall = rainfall.rename(columns = {"station_id" : "rainfall"}).drop("station_loc", axis = 1)

In [13]:
rainfall.head()

Unnamed: 0,rainfall,rain_lat,rain_lon
0,S77,1.2937,103.8125
1,S109,1.3764,103.8492
2,S90,1.3191,103.8191
3,S114,1.38,103.73
4,S50,1.3337,103.7768


In [14]:
URL = "https://api.data.gov.sg/v1/transport/traffic-images"
response = requests.get(URL, timeout=10)
resp_content = json.loads(response.content.decode('utf-8'))
cameras = resp_content['items'][0]['cameras']

In [15]:
df_rows = []

for x in cameras:
    cam_id = x["camera_id"]
    location = x["location"]
    lat = location["latitude"]
    lon = location["longitude"]
    df_rows.append([cam_id, lat, lon])

In [16]:
camera_df = pd.DataFrame(df_rows, columns = ["cam_id", "cam_lat", "cam_lon"])

In [17]:
camera_df

Unnamed: 0,cam_id,cam_lat,cam_lon
0,1001,1.295313,103.871146
1,1002,1.319541,103.878563
2,1003,1.323957,103.872858
3,1004,1.319536,103.875067
4,1005,1.363520,103.905394
...,...,...,...
82,9702,1.394741,103.817971
83,9703,1.422857,103.773005
84,9704,1.422143,103.795421
85,9705,1.426277,103.787166


# Merge tables and filter 

In [39]:
df = camera_df.merge(not_rainfall, how='cross')

In [40]:
df["not_rain_dist"] = df.apply(lambda x : get_distance(x.cam_lat, x.not_rain_lat, x.cam_lon, x.not_rain_lon), axis = 1)
idx = df.groupby(['cam_id'])['not_rain_dist'].transform(min) == df['not_rain_dist']
df = df[idx]

In [41]:
df

Unnamed: 0,cam_id,cam_lat,cam_lon,not_rainfall,not_rain_lat,not_rain_lon,not_rain_dist
1,1001,1.295313,103.871146,S108,1.27990,103.87030,1.717000
6,1002,1.319541,103.878563,S108,1.27990,103.87030,4.503986
11,1003,1.323957,103.872858,S108,1.27990,103.87030,4.908748
16,1004,1.319536,103.875067,S108,1.27990,103.87030,4.440426
20,1005,1.363520,103.905394,S107,1.31350,103.96250,8.442707
...,...,...,...,...,...,...,...
411,9702,1.394741,103.817971,S108,1.27990,103.87030,14.036684
417,9703,1.422857,103.773005,S44,1.34583,103.68166,13.288216
422,9704,1.422143,103.795421,S44,1.34583,103.68166,15.233866
427,9705,1.426277,103.787166,S44,1.34583,103.68166,14.754971


In [42]:
df2 = df.merge(rainfall, how = "cross")
df2["rain_dist"] = df2.apply(lambda x : get_distance(x.cam_lat, x.rain_lat, x.cam_lon, x.rain_lon), axis = 1)
idx2 = df2.groupby(['cam_id'])['rain_dist'].transform(min) == df2['rain_dist']
df2 = df2[idx2]

In [43]:
df2

Unnamed: 0,cam_id,cam_lat,cam_lon,not_rainfall,not_rain_lat,not_rain_lon,not_rain_dist,rainfall,rain_lat,rain_lon,rain_dist
18,1001,1.295313,103.871146,S108,1.27990,103.87030,1.717000,S119,1.30105,103.86660,0.814070
74,1002,1.319541,103.878563,S108,1.27990,103.87030,4.503986,S215,1.32785,103.88899,1.482777
193,1003,1.323957,103.872858,S108,1.27990,103.87030,4.908748,S123,1.32140,103.85770,1.709364
210,1004,1.319536,103.875067,S108,1.27990,103.87030,4.440426,S215,1.32785,103.88899,1.803431
315,1005,1.363520,103.905394,S107,1.31350,103.96250,8.442707,S221,1.35691,103.89088,1.773508
...,...,...,...,...,...,...,...,...,...,...,...
5640,9702,1.394741,103.817971,S108,1.27990,103.87030,14.036684,S08,1.37010,103.82710,2.922747
5678,9703,1.422857,103.773005,S44,1.34583,103.68166,13.288216,S211,1.42918,103.75711,1.902241
5726,9704,1.422143,103.795421,S44,1.34583,103.68166,15.233866,S40,1.40440,103.78962,2.076291
5846,9705,1.426277,103.787166,S44,1.34583,103.68166,14.754971,S104,1.44387,103.78538,1.966909


# Updated section 

Cos the forecast does not have a station attached, need to do a manual mapping for those.  
I google maps the area. look for the approx centre then took the lat long


In [32]:
area = pd.read_csv("area_lat_lon.csv")

In [35]:
area["area_lat"] = area.coord.apply(lambda x: float(x.split(", ")[0]))
area["area_lon"] = area.coord.apply(lambda x: float(x.split(", ")[1]))
area = area.drop("coord", axis = 1)

In [36]:
area.head()

Unnamed: 0,region,area_lat,area_lon
0,Ang Mo Kio,1.377841,103.844704
1,Bedok,1.323922,103.927299
2,Bishan,1.356407,103.84055
3,Boon Lay,1.314076,103.702239
4,Bukit Batok,1.356077,103.757304


In [37]:
df3 = df2.merge(area, how = "cross")
df3["area_dist"] = df3.apply(lambda x : get_distance(x.cam_lat, x.area_lat, x.cam_lon, x.area_lon), axis = 1)
idx3 = df3.groupby(['cam_id'])['area_dist'].transform(min) == df3['area_dist']
df3 = df3[idx3]

In [38]:
df3

Unnamed: 0,cam_id,cam_lat,cam_lon,not_rainfall,not_rain_lat,not_rain_lon,not_rain_dist,rainfall,rain_lat,rain_lon,rain_dist,region,area_lat,area_lon,area_dist
19,1001,1.295313,103.871146,S108,1.27990,103.87030,1.717000,S119,1.30105,103.86660,0.814070,Kallang,1.310651,103.866324,1.788340
60,1002,1.319541,103.878563,S108,1.27990,103.87030,4.503986,S215,1.32785,103.88899,1.482777,Geylang,1.322886,103.890540,1.382904
113,1003,1.323957,103.872858,S108,1.27990,103.87030,4.908748,S123,1.32140,103.85770,1.709364,Kallang,1.310651,103.866324,1.648728
160,1004,1.319536,103.875067,S108,1.27990,103.87030,4.440426,S215,1.32785,103.88899,1.803431,Kallang,1.310651,103.866324,1.386249
213,1005,1.363520,103.905394,S107,1.31350,103.96250,8.442707,S221,1.35691,103.89088,1.773508,Paya Lebar,1.358887,103.915261,1.212131
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3862,9702,1.394741,103.817971,S108,1.27990,103.87030,14.036684,S08,1.37010,103.82710,2.922747,Central Water Catchment,1.378353,103.800802,2.639560
3938,9703,1.422857,103.773005,S44,1.34583,103.68166,13.288216,S211,1.42918,103.75711,1.902241,Sungei Kadut,1.414737,103.755842,2.111402
3969,9704,1.422143,103.795421,S44,1.34583,103.68166,15.233866,S40,1.40440,103.78962,2.076291,Mandai,1.421789,103.796008,0.076262
4016,9705,1.426277,103.787166,S44,1.34583,103.68166,14.754971,S104,1.44387,103.78538,1.966909,Mandai,1.421789,103.796008,1.102623


# Writing Final 

In [45]:
df_final = df3[["cam_id", "not_rainfall", "rainfall", "region"]].sort_values("cam_id").reset_index().drop("index", axis = 1)

In [46]:
df_final

Unnamed: 0,cam_id,not_rainfall,rainfall,region
0,1001,S108,S119,Kallang
1,1002,S108,S215,Geylang
2,1003,S108,S123,Kallang
3,1004,S108,S215,Kallang
4,1005,S107,S221,Paya Lebar
...,...,...,...,...
82,9702,S108,S08,Central Water Catchment
83,9703,S44,S211,Sungei Kadut
84,9704,S44,S40,Mandai
85,9705,S44,S104,Mandai


In [47]:
df_final.to_csv("camera_station_mapping.csv", index = False)