# Intro 

This notebook is to come up with a mapping for each camera location to the neaerest sensor reading

In [129]:
import pandas as pd 

import json
import requests

from math import sin, cos, sqrt, atan2, radians

In [130]:
# Helper function 
def get_distance(lat1, lat2, lon1, lon2):
    """
    Code formula taken from 
    https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude
    """
    
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    R = 6373.0
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c

    return distance 

In [131]:
air_temp = pd.read_csv("air_temp.csv")
air_temp.head()

Unnamed: 0.1,Unnamed: 0,air_temp_realtime,station_id,station_loc,station_name,timestamp
0,0,28.0,S107,"(1.3135, 103.9625)",East Coast Parkway,2022-03-25T220000
1,1,27.7,S108,"(1.2799, 103.8703)",Marina Gardens Drive,2022-03-25T220000
2,2,26.7,S44,"(1.34583, 103.68166)",Nanyang Avenue,2022-03-25T220000
3,3,25.3,S106,"(1.4168, 103.9673)",Pulau Ubin,2022-03-25T220000
4,4,26.8,S24,"(1.3678, 103.9826)",Upper Changi Road North,2022-03-25T220000


In [132]:
air_temp.station_id.unique()

array(['S107', 'S108', 'S44', 'S106', 'S24'], dtype=object)

In [133]:
air_temp = air_temp[["station_id", "station_loc"]].drop_duplicates()

In [134]:
wind_dir = pd.read_csv("wind_dir.csv")
wind_dir = wind_dir[["station_id", "station_loc"]].drop_duplicates()

wind_spd = pd.read_csv("wind_speed.csv")
wind_spd = wind_spd[["station_id", "station_loc"]].drop_duplicates()

rh = pd.read_csv("RH%.csv")
rh = rh[["station_id", "station_loc"]].drop_duplicates()

In [135]:
print((air_temp == wind_spd).all())
print((air_temp == wind_dir).all())
print((air_temp == rh).all())

station_id     True
station_loc    True
dtype: bool
station_id     True
station_loc    True
dtype: bool
station_id     True
station_loc    True
dtype: bool


In [137]:
air_temp["not_rain_lat"] = air_temp.station_loc.apply(lambda x: float(x.split(", ")[0][1:]))
air_temp["not_rain_lon"] = air_temp.station_loc.apply(lambda x: float(x.split(", ")[1][:-1]))

In [138]:
not_rainfall = air_temp.rename(columns = {"station_id" : "not_rainfall"}).drop("station_loc", axis = 1)
not_rainfall

Unnamed: 0,not_rainfall,not_rain_lat,not_rain_lon
0,S107,1.3135,103.9625
1,S108,1.2799,103.8703
2,S44,1.34583,103.68166
3,S106,1.4168,103.9673
4,S24,1.3678,103.9826


In [139]:
rainfall = pd.read_csv("rainfall.csv")
rainfall = rainfall[["station_id", "station_loc"]].drop_duplicates()

In [140]:
rainfall["rain_lat"] = rainfall.station_loc.apply(lambda x: float(x.split(", ")[0][1:]))
rainfall["rain_lon"] = rainfall.station_loc.apply(lambda x: float(x.split(", ")[1][:-1]))

In [141]:
rainfall = rainfall.rename(columns = {"station_id" : "rainfall"}).drop("station_loc", axis = 1)

In [142]:
rainfall.head()

Unnamed: 0,rainfall,rain_lat,rain_lon
0,S77,1.2937,103.8125
1,S109,1.3764,103.8492
2,S90,1.3191,103.8191
3,S114,1.38,103.73
4,S50,1.3337,103.7768


In [143]:
URL = "https://api.data.gov.sg/v1/transport/traffic-images"
response = requests.get(URL, timeout=10)
resp_content = json.loads(response.content.decode('utf-8'))
cameras = resp_content['items'][0]['cameras']

In [144]:
df_rows = []

for x in cameras:
    cam_id = x["camera_id"]
    location = x["location"]
    lat = location["latitude"]
    lon = location["longitude"]
    df_rows.append([cam_id, lat, lon])

In [147]:
camera_df = pd.DataFrame(df_rows, columns = ["cam_id", "cam_lat", "cam_lon"])

In [148]:
camera_df

Unnamed: 0,cam_id,cam_lat,cam_lon
0,1003,1.323957,103.872858
1,1004,1.319536,103.875067
2,1005,1.363520,103.905394
3,1006,1.357099,103.902042
4,1501,1.274144,103.851317
...,...,...,...
82,9704,1.422143,103.795421
83,9705,1.426277,103.787166
84,9706,1.412701,103.806427
85,1001,1.295313,103.871146


# Merge tables and filter 

In [177]:
df = camera_df.merge(not_rainfall, how='cross')

In [178]:
df["not_rain_dist"] = df.apply(lambda x : get_distance(x.cam_lat, x.not_rain_lat, x.cam_lon, x.not_rain_lon), axis = 1)

In [179]:
idx = df.groupby(['cam_id'])['not_rain_dist'].transform(min) == df['not_rain_dist']

In [180]:
df = df[idx]

In [181]:
df

Unnamed: 0,cam_id,cam_lat,cam_lon,not_rainfall,not_rain_lat,not_rain_lon,not_rain_dist
1,1003,1.323957,103.872858,S108,1.27990,103.87030,4.908748
6,1004,1.319536,103.875067,S108,1.27990,103.87030,4.440426
10,1005,1.363520,103.905394,S107,1.31350,103.96250,8.442707
15,1006,1.357099,103.902042,S107,1.31350,103.96250,8.289444
21,1501,1.274144,103.851317,S108,1.27990,103.87030,2.205929
...,...,...,...,...,...,...,...
412,9704,1.422143,103.795421,S44,1.34583,103.68166,15.233866
417,9705,1.426277,103.787166,S44,1.34583,103.68166,14.754971
422,9706,1.412701,103.806427,S44,1.34583,103.68166,15.741865
426,1001,1.295313,103.871146,S108,1.27990,103.87030,1.717000


In [182]:
df2 = df.merge(rainfall, how = "cross")
df2["rain_dist"] = df2.apply(lambda x : get_distance(x.cam_lat, x.rain_lat, x.cam_lon, x.rain_lon), axis = 1)
idx2 = df2.groupby(['cam_id'])['rain_dist'].transform(min) == df2['rain_dist']
df2 = df2[idx2]

In [191]:
df_final = df2[["cam_id", "not_rainfall", "rainfall"]].sort_values("cam_id").reset_index().drop("index", axis = 1)

In [192]:
df_final

Unnamed: 0,cam_id,not_rainfall,rainfall
0,1001,S108,S119
1,1002,S108,S215
2,1003,S108,S123
3,1004,S108,S215
4,1005,S107,S221
...,...,...,...
82,9702,S108,S08
83,9703,S44,S211
84,9704,S44,S40
85,9705,S44,S104


In [193]:
df_final.to_csv("camera_station_mapping.csv", index = False)