# Intro 

This notebook is to come up with a mapping for each camera location to the neaerest sensor reading

In [1]:
import pandas as pd 

import json
import requests

from math import sin, cos, sqrt, atan2, radians

In [2]:
# Helper function 
def get_distance(lat1, lat2, lon1, lon2):
    """
    Code formula taken from 
    https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude
    """
    
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    R = 6373.0
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c

    return distance 

In [3]:
air_temp = pd.read_csv("air_temp.csv")
air_temp.head()

Unnamed: 0.1,Unnamed: 0,air_temp_realtime,station_id,station_loc,station_name,timestamp
0,0,28.0,S107,"(1.3135, 103.9625)",East Coast Parkway,2022-03-25T220000
1,1,27.7,S108,"(1.2799, 103.8703)",Marina Gardens Drive,2022-03-25T220000
2,2,26.7,S44,"(1.34583, 103.68166)",Nanyang Avenue,2022-03-25T220000
3,3,25.3,S106,"(1.4168, 103.9673)",Pulau Ubin,2022-03-25T220000
4,4,26.8,S24,"(1.3678, 103.9826)",Upper Changi Road North,2022-03-25T220000


In [4]:
air_temp.station_id.unique()

array(['S107', 'S108', 'S44', 'S106', 'S24'], dtype=object)

In [5]:
air_temp = air_temp[["station_id", "station_loc"]].drop_duplicates()

In [6]:
wind_dir = pd.read_csv("wind_dir.csv")
wind_dir = wind_dir[["station_id", "station_loc"]].drop_duplicates()

wind_spd = pd.read_csv("wind_speed.csv")
wind_spd = wind_spd[["station_id", "station_loc"]].drop_duplicates()

rh = pd.read_csv("RH%.csv")
rh = rh[["station_id", "station_loc"]].drop_duplicates()

In [7]:
print((air_temp == wind_spd).all())
print((air_temp == wind_dir).all())
print((air_temp == rh).all())

station_id     True
station_loc    True
dtype: bool
station_id     True
station_loc    True
dtype: bool
station_id     True
station_loc    True
dtype: bool


In [8]:
air_temp["not_rain_lat"] = air_temp.station_loc.apply(lambda x: float(x.split(", ")[0][1:]))
air_temp["not_rain_lon"] = air_temp.station_loc.apply(lambda x: float(x.split(", ")[1][:-1]))

In [9]:
not_rainfall = air_temp.rename(columns = {"station_id" : "not_rainfall"}).drop("station_loc", axis = 1)
not_rainfall

Unnamed: 0,not_rainfall,not_rain_lat,not_rain_lon
0,S107,1.3135,103.9625
1,S108,1.2799,103.8703
2,S44,1.34583,103.68166
3,S106,1.4168,103.9673
4,S24,1.3678,103.9826


In [10]:
rainfall = pd.read_csv("rainfall.csv")
rainfall = rainfall[["station_id", "station_loc"]].drop_duplicates()

In [11]:
rainfall["rain_lat"] = rainfall.station_loc.apply(lambda x: float(x.split(", ")[0][1:]))
rainfall["rain_lon"] = rainfall.station_loc.apply(lambda x: float(x.split(", ")[1][:-1]))

In [12]:
rainfall = rainfall.rename(columns = {"station_id" : "rainfall"}).drop("station_loc", axis = 1)

In [13]:
rainfall.head()

Unnamed: 0,rainfall,rain_lat,rain_lon
0,S77,1.2937,103.8125
1,S109,1.3764,103.8492
2,S90,1.3191,103.8191
3,S114,1.38,103.73
4,S50,1.3337,103.7768


In [14]:
URL = "https://api.data.gov.sg/v1/transport/traffic-images"
response = requests.get(URL, timeout=10)
resp_content = json.loads(response.content.decode('utf-8'))
cameras = resp_content['items'][0]['cameras']

In [15]:
df_rows = []

for x in cameras:
    cam_id = x["camera_id"]
    location = x["location"]
    lat = location["latitude"]
    lon = location["longitude"]
    df_rows.append([cam_id, lat, lon])

In [16]:
camera_df = pd.DataFrame(df_rows, columns = ["cam_id", "cam_lat", "cam_lon"])

In [17]:
camera_df

Unnamed: 0,cam_id,cam_lat,cam_lon
0,4702,1.272370,103.832400
1,1005,1.363520,103.905394
2,1006,1.357099,103.902042
3,1501,1.274144,103.851317
4,1502,1.271351,103.861828
...,...,...,...
82,1003,1.323957,103.872858
83,1004,1.319536,103.875067
84,8701,1.386470,103.741430
85,2707,1.398300,103.774247


# Merge tables and filter 

In [18]:
df = camera_df.merge(not_rainfall, how='cross')

In [19]:
df["not_rain_dist"] = df.apply(lambda x : get_distance(x.cam_lat, x.not_rain_lat, x.cam_lon, x.not_rain_lon), axis = 1)
idx = df.groupby(['cam_id'])['not_rain_dist'].transform(min) == df['not_rain_dist']
df = df[idx]

In [20]:
df

Unnamed: 0,cam_id,cam_lat,cam_lon,not_rainfall,not_rain_lat,not_rain_lon,not_rain_dist
1,4702,1.272370,103.832400,S108,1.27990,103.87030,4.296983
5,1005,1.363520,103.905394,S107,1.31350,103.96250,8.442707
10,1006,1.357099,103.902042,S107,1.31350,103.96250,8.289444
16,1501,1.274144,103.851317,S108,1.27990,103.87030,2.205929
21,1502,1.271351,103.861828,S108,1.27990,103.87030,1.338547
...,...,...,...,...,...,...,...
411,1003,1.323957,103.872858,S108,1.27990,103.87030,4.908748
416,1004,1.319536,103.875067,S108,1.27990,103.87030,4.440426
422,8701,1.386470,103.741430,S44,1.34583,103.68166,8.037871
427,2707,1.398300,103.774247,S44,1.34583,103.68166,11.834634


In [21]:
df2 = df.merge(rainfall, how = "cross")
df2["rain_dist"] = df2.apply(lambda x : get_distance(x.cam_lat, x.rain_lat, x.cam_lon, x.rain_lon), axis = 1)
idx2 = df2.groupby(['cam_id'])['rain_dist'].transform(min) == df2['rain_dist']
df2 = df2[idx2]

In [22]:
df2

Unnamed: 0,cam_id,cam_lat,cam_lon,not_rainfall,not_rain_lat,not_rain_lon,not_rain_dist,rainfall,rain_lat,rain_lon,rain_dist
44,4702,1.272370,103.832400,S108,1.27990,103.87030,4.296983,S222,1.28987,103.82364,2.176666
111,1005,1.363520,103.905394,S107,1.31350,103.96250,8.442707,S221,1.35691,103.89088,1.773508
179,1006,1.357099,103.902042,S107,1.31350,103.96250,8.289444,S221,1.35691,103.89088,1.241377
219,1501,1.274144,103.851317,S108,1.27990,103.87030,2.205929,S108,1.27990,103.87030,2.205929
287,1502,1.271351,103.861828,S108,1.27990,103.87030,1.338547,S108,1.27990,103.87030,1.338547
...,...,...,...,...,...,...,...,...,...,...,...
5633,1003,1.323957,103.872858,S108,1.27990,103.87030,4.908748,S123,1.32140,103.85770,1.709364
5650,1004,1.319536,103.875067,S108,1.27990,103.87030,4.440426,S215,1.32785,103.88899,1.803431
5715,8701,1.386470,103.741430,S44,1.34583,103.68166,8.037871,S114,1.38000,103.73000,1.460587
5794,2707,1.398300,103.774247,S44,1.34583,103.68166,11.834634,S40,1.40440,103.78962,1.839157


# Updated section 

Cos the forecast does not have a station attached, need to do a manual mapping for those.  
I google maps the area. look for the approx centre then took the lat long


In [23]:
area = pd.read_csv("area_lat_lon.csv")

In [24]:
area["area_lat"] = area.coord.apply(lambda x: float(x.split(", ")[0]))
area["area_lon"] = area.coord.apply(lambda x: float(x.split(", ")[1]))
area = area.drop("coord", axis = 1)

In [25]:
area.head()

Unnamed: 0,region,area_lat,area_lon
0,Ang Mo Kio,1.377841,103.844704
1,Bedok,1.323922,103.927299
2,Bishan,1.356407,103.84055
3,Boon Lay,1.314076,103.702239
4,Bukit Batok,1.356077,103.757304


In [26]:
df3 = df2.merge(area, how = "cross")
df3["area_dist"] = df3.apply(lambda x : get_distance(x.cam_lat, x.area_lat, x.cam_lon, x.area_lon), axis = 1)
idx3 = df3.groupby(['cam_id'])['area_dist'].transform(min) == df3['area_dist']
df3 = df3[idx3]

In [27]:
df3

Unnamed: 0,cam_id,cam_lat,cam_lon,not_rainfall,not_rain_lat,not_rain_lon,not_rain_dist,rainfall,rain_lat,rain_lon,rain_dist,region,area_lat,area_lon,area_dist
5,4702,1.272370,103.832400,S108,1.27990,103.87030,4.296983,S222,1.28987,103.82364,2.176666,Bukit Merah,1.279513,103.823170,1.297930
72,1005,1.363520,103.905394,S107,1.31350,103.96250,8.442707,S221,1.35691,103.89088,1.773508,Paya Lebar,1.358887,103.915261,1.212131
119,1006,1.357099,103.902042,S107,1.31350,103.96250,8.289444,S221,1.35691,103.89088,1.241377,Paya Lebar,1.358887,103.915261,1.483300
153,1501,1.274144,103.851317,S108,1.27990,103.87030,2.205929,S108,1.27990,103.87030,2.205929,City,1.286372,103.842138,1.700544
200,1502,1.271351,103.861828,S108,1.27990,103.87030,1.338547,S108,1.27990,103.87030,1.338547,City,1.286372,103.842138,2.754294
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3873,1003,1.323957,103.872858,S108,1.27990,103.87030,4.908748,S123,1.32140,103.85770,1.709364,Kallang,1.310651,103.866324,1.648728
3920,1004,1.319536,103.875067,S108,1.27990,103.87030,4.440426,S215,1.32785,103.88899,1.803431,Kallang,1.310651,103.866324,1.386249
3958,8701,1.386470,103.741430,S44,1.34583,103.68166,8.037871,S114,1.38000,103.73000,1.460587,Choa Chu Kang,1.388173,103.747974,0.751959
4032,2707,1.398300,103.774247,S44,1.34583,103.68166,11.834634,S40,1.40440,103.78962,1.839157,Sungei Kadut,1.414737,103.755842,2.744296


# Add north south east west 

In [28]:
compass = ["north", "south", "east", "west", "central"]
compass_lat = [1.4171600757873817,
               1.2689570746680328,
               1.3474858448902267,
               1.3529774835842006,
               1.3569871019585316]
compass_lon = [103.80906027423956,
               103.82763148734753,
               103.94089620103182,
               103.70537681098106,
               103.82099141380705]
compass_df = pd.DataFrame([compass, compass_lat, compass_lon]).T.\
rename(columns = {0 : "compass", 1 : "compass_lat", 2: "compass_lon"})
compass_df

Unnamed: 0,compass,compass_lat,compass_lon
0,north,1.41716,103.80906
1,south,1.268957,103.827631
2,east,1.347486,103.940896
3,west,1.352977,103.705377
4,central,1.356987,103.820991


In [29]:
df4 = df3.merge(compass_df, how = "cross")
df4["compass_dist"] = df4.apply(lambda x : get_distance(x.cam_lat, x.compass_lat, x.cam_lon, x.compass_lon), axis = 1)
idx4 = df4.groupby(['cam_id'])['compass_dist'].transform(min) == df4['compass_dist']
df4 = df4[idx4]
df4.head()

Unnamed: 0,cam_id,cam_lat,cam_lon,not_rainfall,not_rain_lat,not_rain_lon,not_rain_dist,rainfall,rain_lat,rain_lon,rain_dist,region,area_lat,area_lon,area_dist,compass,compass_lat,compass_lon,compass_dist
1,4702,1.27237,103.8324,S108,1.2799,103.8703,4.296983,S222,1.28987,103.82364,2.176666,Bukit Merah,1.279513,103.82317,1.29793,south,1.268957,103.827631,0.652148
7,1005,1.36352,103.905394,S107,1.3135,103.9625,8.442707,S221,1.35691,103.89088,1.773508,Paya Lebar,1.358887,103.915261,1.212131,east,1.347486,103.940896,4.331958
12,1006,1.357099,103.902042,S107,1.3135,103.9625,8.289444,S221,1.35691,103.89088,1.241377,Paya Lebar,1.358887,103.915261,1.4833,east,1.347486,103.940896,4.450882
16,1501,1.274144,103.851317,S108,1.2799,103.8703,2.205929,S108,1.2799,103.8703,2.205929,City,1.286372,103.842138,1.700544,south,1.268957,103.827631,2.696312
21,1502,1.271351,103.861828,S108,1.2799,103.8703,1.338547,S108,1.2799,103.8703,1.338547,City,1.286372,103.842138,2.754294,south,1.268957,103.827631,3.812097


# Writing Final 

In [30]:
df_final = df4[["cam_id", "not_rainfall", "rainfall", "region", "compass"]].sort_values("cam_id").reset_index().drop("index", axis = 1)

In [31]:
df_final.groupby("compass").count()

Unnamed: 0_level_0,cam_id,not_rainfall,rainfall,region
compass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
central,19,19,19,19
east,18,18,18,18
north,15,15,15,15
south,19,19,19,19
west,16,16,16,16


In [32]:
df_final.to_csv("camera_station_mapping.csv", index = False)