In [1]:
from helper import get_req_handler, initBoto3Session, get_s3_objs
from dotenv import load_dotenv
import boto3
import os
from io import StringIO

import pandas as pd
import numpy as np

# Camera to Location Map 

In [42]:
cam_to_loc = pd.read_csv("camera_station_mapping.csv")
cam_to_loc = cam_to_loc.rename(columns = {"rainfall" : "rainfall_station_id", 
                                         "not_rainfall" : "non_rainfall_station_id", 
                                         "region" : "2hr_forecast_area"})
cam_to_loc.head()

Unnamed: 0,cam_id,non_rainfall_station_id,rainfall_station_id,2hr_forecast_area,compass
0,1001,S108,S119,Kallang,south
1,1002,S108,S215,Geylang,east
2,1003,S108,S123,Kallang,central
3,1004,S108,S215,Kallang,central
4,1005,S107,S221,Paya Lebar,east


# Set Up 

In [3]:
load_dotenv()

aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"]
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"]

client = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)

bucket_name = os.environ["BUCKET_NAME"]
data_dir = os.environ["DATA_DIR"]

In [4]:
def get_csv(file_name):
    csv_obj = client.get_object(Bucket=bucket_name, Key= data_dir + file_name)
    body = csv_obj['Body']
    csv_string = body.read().decode('utf-8')
    df = pd.read_csv(StringIO(csv_string))
    df = df.drop("Unnamed: 0", axis = 1)
    return df

# Read Files 

IMPORTANT: Note all data will be consistent (sometime the stations wont appear)  
Time stamps are also different - shimar looking into it 

In [5]:
rainfall_realtime = get_csv("rainfall-realtime.csv" )
rainfall_realtime.head()

Unnamed: 0,timestamp,station_id,station_loc,station_name,rainfall_realtime,call_timestamp
0,2022-03-31T222500,S77,"(1.2937, 103.8125)",Alexandra Road,0.0,2022-03-31T223549
1,2022-03-31T222500,S109,"(1.3764, 103.8492)",Ang Mo Kio Avenue 5,0.0,2022-03-31T223549
2,2022-03-31T222500,S90,"(1.3191, 103.8191)",Bukit Timah Road,0.0,2022-03-31T223549
3,2022-03-31T222500,S114,"(1.38, 103.73)",Choa Chu Kang Avenue 4,0.0,2022-03-31T223549
4,2022-03-31T222500,S50,"(1.3337, 103.7768)",Clementi Road,0.0,2022-03-31T223549


In [6]:
non_rainfall_realtime = get_csv("non-rainfall-realtime.csv" )
non_rainfall_realtime.head()

Unnamed: 0,timestamp,station_id,station_loc,station_name,wind_speed_realtime,wind_dir_realtime,RH%_realtime,air_temp_realtime,call_timestamp
0,2022-03-31T223000,S107,"(1.3135, 103.9625)",East Coast Parkway,2.7,251,78.2,28.6,2022-03-31T223549
1,2022-03-31T223000,S108,"(1.2799, 103.8703)",Marina Gardens Drive,1.6,251,93.9,27.6,2022-03-31T223549
2,2022-03-31T223000,S44,"(1.34583, 103.68166)",Nanyang Avenue,1.1,230,82.7,27.4,2022-03-31T223549
3,2022-03-31T223000,S106,"(1.4168, 103.9673)",Pulau Ubin,0.9,266,96.5,25.7,2022-03-31T223549
4,2022-03-31T223000,S24,"(1.3678, 103.9826)",Upper Changi Road North,1.8,329,84.7,27.5,2022-03-31T223549


In [7]:
forecast_4d = get_csv("forecast-4DAY.csv")
forecast_4d.head()

Unnamed: 0,4day_date,4day_forecast,4day_temperature_low,4day_temperature_high,4day_relative_humidity_low,4day_relative_humidity_high,4day_wind_speed_low,4day_wind_speed_high,4day_wind_direction,4day_update_timestamp,call_timestamp
0,2022-04-01T000000,Afternoon thundery showers,24,34,65,95,5,15,NW,2022-03-31T173213,2022-03-31T223549
1,2022-04-02T000000,Afternoon thundery showers,24,34,65,95,5,15,NW,2022-03-31T173213,2022-03-31T223549
2,2022-04-03T000000,Afternoon thundery showers,24,34,65,90,5,15,W,2022-03-31T173213,2022-03-31T223549
3,2022-04-04T000000,Afternoon thundery showers,24,34,65,90,5,15,W,2022-03-31T173213,2022-03-31T223549
4,2022-04-01T000000,Afternoon thundery showers,24,34,65,95,5,15,NW,2022-03-31T173213,2022-03-31T223627


In [8]:
forecast_24h = get_csv("forecast-24HR.csv")
forecast_24h.head()

Unnamed: 0,24hr_start,24hr_end,24hr_general_forecast,24hr_general_relative_humidity_low,24hr_general_relative_humidity_high,24hr_general_temperature_low,24hr_general_temperature_high,24hr_general_wind_speed_low,24hr_general_wind_speed_high,24hr_general_wind_direction,...,24hr_period_3_forecast_south,24hr_period_3_forecast_north,call_timestamp,24hr_period_4_start,24hr_period_4_end,24hr_period_4_forecast_west,24hr_period_4_forecast_east,24hr_period_4_forecast_central,24hr_period_4_forecast_south,24hr_period_4_forecast_north
0,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T223549,,,,,,,
1,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T223627,,,,,,,
2,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T225031,,,,,,,
3,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T225533,,,,,,,
4,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T230035,,,,,,,


In [9]:
forecast_2h = get_csv("forecast-2HR.csv")
forecast_2h.head()

Unnamed: 0,2hr_forecast_area,2hr_forecast_area_loc,2hr_forecast_value,2hr_start,2hr_end,call_timestamp
0,Ang Mo Kio,"(1.375, 103.839)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549
1,Bedok,"(1.321, 103.924)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549
2,Bishan,"(1.350772, 103.839)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549
3,Boon Lay,"(1.304, 103.701)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549
4,Bukit Batok,"(1.353, 103.754)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549


In [10]:
bing_data = pd.read_csv("sample_bing.csv").drop("Unnamed: 0", axis = 1)
bing_data.head()

Unnamed: 0,camera_id,direction,trafficCongestion,travelDistance,travelDuration,travelDurationTraffic,distanceUnit,durationUnit,call_timestamp,cam_loc,Lopsided,dir_start,dir_finish
0,1704,1,Heavy,0.105,7,13,Kilometer,Second,2022-04-03T105710,"(1.28569398886979, 103.837524510188)",False,"(1.2853061926158522, 103.83723720205947)","(1.2859727166962915, 103.83792540017214)"
1,1704,2,Mild,0.127,7,8,Kilometer,Second,2022-04-03T105710,"(1.28569398886979, 103.837524510188)",False,"(1.2852273564227954, 103.83728977274863)","(1.2859942174701802, 103.83811178716098)"
2,1704,3,Heavy,0.117,5,8,Kilometer,Second,2022-04-03T105710,"(1.28569398886979, 103.837524510188)",False,"(1.2851962997400104, 103.8373901349734)","(1.2859416600225777, 103.83813329335197)"
3,1704,4,Heavy,0.114,5,8,Kilometer,Second,2022-04-03T105710,"(1.28569398886979, 103.837524510188)",False,"(1.285877157699029, 103.83818347446437)","(1.2851174635435438, 103.83748332846783)"
4,1705,1,,0.214,9,9,Kilometer,Second,2022-04-03T105710,"(1.375925022, 103.8587986)",False,"(1.3752939035761242, 103.85871517155806)","(1.377181002101763, 103.85832766854271)"


# Processing 

So the objective here is to make the dataset into a useable one - ie the whole long row as we would input into a model   
Chances are going to have to process individually.  
The superkey to join all tables will be the timestamp except forecast idk how that will work  

## Realtime Rainfall 

In [11]:
# original view
rainfall_realtime.head()

Unnamed: 0,timestamp,station_id,station_loc,station_name,rainfall_realtime,call_timestamp
0,2022-03-31T222500,S77,"(1.2937, 103.8125)",Alexandra Road,0.0,2022-03-31T223549
1,2022-03-31T222500,S109,"(1.3764, 103.8492)",Ang Mo Kio Avenue 5,0.0,2022-03-31T223549
2,2022-03-31T222500,S90,"(1.3191, 103.8191)",Bukit Timah Road,0.0,2022-03-31T223549
3,2022-03-31T222500,S114,"(1.38, 103.73)",Choa Chu Kang Avenue 4,0.0,2022-03-31T223549
4,2022-03-31T222500,S50,"(1.3337, 103.7768)",Clementi Road,0.0,2022-03-31T223549


In [12]:
rainfall_realtime = rainfall_realtime[["call_timestamp", "station_id", "rainfall_realtime"]]
rainfall_realtime = rainfall_realtime.rename(columns = {"station_id" : "rainfall_station_id"})
rainfall_realtime.head()

Unnamed: 0,call_timestamp,rainfall_station_id,rainfall_realtime
0,2022-03-31T223549,S77,0.0
1,2022-03-31T223549,S109,0.0
2,2022-03-31T223549,S90,0.0
3,2022-03-31T223549,S114,0.0
4,2022-03-31T223549,S50,0.0


## Realtime non rainfall

In [13]:
non_rainfall_realtime.head()

Unnamed: 0,timestamp,station_id,station_loc,station_name,wind_speed_realtime,wind_dir_realtime,RH%_realtime,air_temp_realtime,call_timestamp
0,2022-03-31T223000,S107,"(1.3135, 103.9625)",East Coast Parkway,2.7,251,78.2,28.6,2022-03-31T223549
1,2022-03-31T223000,S108,"(1.2799, 103.8703)",Marina Gardens Drive,1.6,251,93.9,27.6,2022-03-31T223549
2,2022-03-31T223000,S44,"(1.34583, 103.68166)",Nanyang Avenue,1.1,230,82.7,27.4,2022-03-31T223549
3,2022-03-31T223000,S106,"(1.4168, 103.9673)",Pulau Ubin,0.9,266,96.5,25.7,2022-03-31T223549
4,2022-03-31T223000,S24,"(1.3678, 103.9826)",Upper Changi Road North,1.8,329,84.7,27.5,2022-03-31T223549


In [14]:
non_rainfall_realtime = non_rainfall_realtime.rename(columns = {"RH%_realtime" : "humidity_realtime", 
                                                               "station_id" : "non_rainfall_station_id"})

In [15]:
non_rainfall_realtime = non_rainfall_realtime[["call_timestamp", "non_rainfall_station_id", "wind_speed_realtime", 
                                               "wind_dir_realtime", "humidity_realtime", "air_temp_realtime"]]
non_rainfall_realtime.head()

Unnamed: 0,call_timestamp,non_rainfall_station_id,wind_speed_realtime,wind_dir_realtime,humidity_realtime,air_temp_realtime
0,2022-03-31T223549,S107,2.7,251,78.2,28.6
1,2022-03-31T223549,S108,1.6,251,93.9,27.6
2,2022-03-31T223549,S44,1.1,230,82.7,27.4
3,2022-03-31T223549,S106,0.9,266,96.5,25.7
4,2022-03-31T223549,S24,1.8,329,84.7,27.5


## 4 Day Forecast

In [16]:
forecast_4d["4day_date"] = pd.to_datetime(forecast_4d["4day_date"])
forecast_4d["grp_row_num"] = forecast_4d.sort_values("4day_date").groupby(["4day_update_timestamp", "call_timestamp"])["4day_date"].cumcount()+1
forecast_4d["grp_row_num"] = forecast_4d.grp_row_num.apply(lambda x : str(x))
forecast_4d = forecast_4d.drop("4day_date", axis = 1)
forecast_4d = forecast_4d.pivot(index = "call_timestamp", columns = "grp_row_num")
forecast_4d.columns = ['_'.join(col) for col in forecast_4d.columns]
forecast_4d = forecast_4d.drop(["4day_update_timestamp_1", "4day_update_timestamp_2", 
                  "4day_update_timestamp_3", "4day_update_timestamp_4"], axis = 1)
forecast_4d = forecast_4d.reset_index()
forecast_4d.head()

Unnamed: 0,call_timestamp,4day_forecast_1,4day_forecast_2,4day_forecast_3,4day_forecast_4,4day_temperature_low_1,4day_temperature_low_2,4day_temperature_low_3,4day_temperature_low_4,4day_temperature_high_1,...,4day_wind_speed_low_3,4day_wind_speed_low_4,4day_wind_speed_high_1,4day_wind_speed_high_2,4day_wind_speed_high_3,4day_wind_speed_high_4,4day_wind_direction_1,4day_wind_direction_2,4day_wind_direction_3,4day_wind_direction_4
0,2022-03-31T182424,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,24,24,24,24,34,...,5,5,15,15,15,15,NW,NW,W,W
1,2022-03-31T182939,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,24,24,24,24,34,...,5,5,15,15,15,15,NW,NW,W,W
2,2022-03-31T183526,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,24,24,24,24,34,...,5,5,15,15,15,15,NW,NW,W,W
3,2022-03-31T184049,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,24,24,24,24,34,...,5,5,15,15,15,15,NW,NW,W,W
4,2022-03-31T184625,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,Afternoon thundery showers,24,24,24,24,34,...,5,5,15,15,15,15,NW,NW,W,W


## 24 Hour Forecast

No processing needed. But got NA to drop 

In [17]:
forecast_24h.head()

Unnamed: 0,24hr_start,24hr_end,24hr_general_forecast,24hr_general_relative_humidity_low,24hr_general_relative_humidity_high,24hr_general_temperature_low,24hr_general_temperature_high,24hr_general_wind_speed_low,24hr_general_wind_speed_high,24hr_general_wind_direction,...,24hr_period_3_forecast_south,24hr_period_3_forecast_north,call_timestamp,24hr_period_4_start,24hr_period_4_end,24hr_period_4_forecast_west,24hr_period_4_forecast_east,24hr_period_4_forecast_central,24hr_period_4_forecast_south,24hr_period_4_forecast_north
0,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T223549,,,,,,,
1,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T223627,,,,,,,
2,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T225031,,,,,,,
3,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T225533,,,,,,,
4,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Thundery Showers,Thundery Showers,2022-03-31T230035,,,,,,,


In [18]:
forecast_24h.isnull().any()

24hr_start                             False
24hr_end                               False
24hr_general_forecast                  False
24hr_general_relative_humidity_low     False
24hr_general_relative_humidity_high    False
24hr_general_temperature_low           False
24hr_general_temperature_high          False
24hr_general_wind_speed_low            False
24hr_general_wind_speed_high           False
24hr_general_wind_direction            False
24hr_period_1_start                    False
24hr_period_1_end                      False
24hr_period_1_forecast_west            False
24hr_period_1_forecast_east            False
24hr_period_1_forecast_central         False
24hr_period_1_forecast_south           False
24hr_period_1_forecast_north           False
24hr_period_2_start                    False
24hr_period_2_end                      False
24hr_period_2_forecast_west            False
24hr_period_2_forecast_east            False
24hr_period_2_forecast_central         False
24hr_perio

In [19]:
forecast_24h = forecast_24h.drop(["24hr_period_4_start", "24hr_period_4_end", "24hr_period_4_forecast_west", 
                                 "24hr_period_4_forecast_east", "24hr_period_4_forecast_central", 
                                 "24hr_period_4_forecast_south", "24hr_period_4_forecast_north"], axis = 1)

In [20]:
forecast_24h

Unnamed: 0,24hr_start,24hr_end,24hr_general_forecast,24hr_general_relative_humidity_low,24hr_general_relative_humidity_high,24hr_general_temperature_low,24hr_general_temperature_high,24hr_general_wind_speed_low,24hr_general_wind_speed_high,24hr_general_wind_direction,...,24hr_period_2_forecast_south,24hr_period_2_forecast_north,24hr_period_3_start,24hr_period_3_end,24hr_period_3_forecast_west,24hr_period_3_forecast_east,24hr_period_3_forecast_central,24hr_period_3_forecast_south,24hr_period_3_forecast_north,call_timestamp
0,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Partly Cloudy (Day),Partly Cloudy (Day),2022-04-01T120000,2022-04-01T180000,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,2022-03-31T223549
1,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Partly Cloudy (Day),Partly Cloudy (Day),2022-04-01T120000,2022-04-01T180000,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,2022-03-31T223627
2,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Partly Cloudy (Day),Partly Cloudy (Day),2022-04-01T120000,2022-04-01T180000,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,2022-03-31T225031
3,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Partly Cloudy (Day),Partly Cloudy (Day),2022-04-01T120000,2022-04-01T180000,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,2022-03-31T225533
4,2022-03-31T180000,2022-04-01T180000,Thundery Showers,60,95,24,34,10,20,NW,...,Partly Cloudy (Day),Partly Cloudy (Day),2022-04-01T120000,2022-04-01T180000,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,Thundery Showers,2022-03-31T230035
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
730,2022-04-03T060000,2022-04-04T060000,Thundery Showers,60,95,24,34,5,15,NW,...,Thundery Showers,Thundery Showers,2022-04-03T180000,2022-04-04T060000,Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),2022-04-03T100302
731,2022-04-03T060000,2022-04-04T060000,Thundery Showers,60,95,24,34,5,15,NW,...,Thundery Showers,Thundery Showers,2022-04-03T180000,2022-04-04T060000,Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),2022-04-03T100814
732,2022-04-03T060000,2022-04-04T060000,Thundery Showers,60,95,24,34,5,15,NW,...,Thundery Showers,Thundery Showers,2022-04-03T180000,2022-04-04T060000,Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),2022-04-03T101329
733,2022-04-03T060000,2022-04-04T060000,Thundery Showers,60,95,24,34,5,15,NW,...,Thundery Showers,Thundery Showers,2022-04-03T180000,2022-04-04T060000,Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),Partly Cloudy (Night),2022-04-03T101843


## 2 Hour Forecast

In [21]:
forecast_2h.head()

Unnamed: 0,2hr_forecast_area,2hr_forecast_area_loc,2hr_forecast_value,2hr_start,2hr_end,call_timestamp
0,Ang Mo Kio,"(1.375, 103.839)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549
1,Bedok,"(1.321, 103.924)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549
2,Bishan,"(1.350772, 103.839)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549
3,Boon Lay,"(1.304, 103.701)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549
4,Bukit Batok,"(1.353, 103.754)",Cloudy,2022-03-31T220000,2022-04-01T000000,2022-03-31T223549


In [22]:
forecast_2h = forecast_2h.drop(["2hr_forecast_area_loc", "2hr_start", "2hr_end"], axis = 1)
forecast_2h.head()

Unnamed: 0,2hr_forecast_area,2hr_forecast_value,call_timestamp
0,Ang Mo Kio,Cloudy,2022-03-31T223549
1,Bedok,Cloudy,2022-03-31T223549
2,Bishan,Cloudy,2022-03-31T223549
3,Boon Lay,Cloudy,2022-03-31T223549
4,Bukit Batok,Cloudy,2022-03-31T223549


## Bing Data 

In [23]:
unique_directions = bing_data[["camera_id", "direction"]].rename(columns = {"camera_id" : "cam_id"}).drop_duplicates()

In [24]:
bing_data = bing_data[["camera_id", "direction", "trafficCongestion", "travelDistance", 
                       "travelDuration", "travelDurationTraffic", "call_timestamp"]]\
.rename(columns = {"camera_id" : "cam_id"})
bing_data.head()

Unnamed: 0,cam_id,direction,trafficCongestion,travelDistance,travelDuration,travelDurationTraffic,call_timestamp
0,1704,1,Heavy,0.105,7,13,2022-04-03T105710
1,1704,2,Mild,0.127,7,8,2022-04-03T105710
2,1704,3,Heavy,0.117,5,8,2022-04-03T105710
3,1704,4,Heavy,0.114,5,8,2022-04-03T105710
4,1705,1,,0.214,9,9,2022-04-03T105710


In [25]:
bing_data["ratio"] = bing_data.travelDurationTraffic / bing_data.travelDuration

In [26]:
bing_data[["trafficCongestion", "ratio"]].groupby("trafficCongestion").agg([min, max, np.mean])

Unnamed: 0_level_0,ratio,ratio,ratio
Unnamed: 0_level_1,min,max,mean
trafficCongestion,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Heavy,1.5,4.9375,2.370663
Medium,1.25,1.4,1.308333
Mild,1.142857,1.2,1.178144
,0.228814,1.090909,0.825089


So basically clear case of correlated variables 

# Combining Tables 

Logic would be to first gather all the time that EVERY dataset has.  
This is to allow for it to be able to join all table. 
Also need to expand the camera mapping 

In [27]:
cam_all = cam_to_loc.merge(unique_directions, on = "cam_id")

In [28]:
# Based on shimar, the time should all be consistent 
# But this is done just for precaution

all_time = set(rainfall_realtime.call_timestamp)\
.intersection(set(non_rainfall_realtime.call_timestamp))\
.intersection(set(forecast_4d.call_timestamp))\
.intersection(set(forecast_24h.call_timestamp))\
.intersection(set(forecast_2h.call_timestamp))#\
#.intersection(set(bing_data.call_timestamp))

In [29]:
all_time = pd.DataFrame(all_time, columns = ["call_timestamp"])

Next step is to cross join this with the camera.  
All cameras should have entries for all time. 

In [30]:
base_df = all_time.merge(cam_to_loc, how = "cross")

In [31]:
base_df

Unnamed: 0,call_timestamp,cam_id,non_rainfall_station_id,rainfall_station_id,2hr_forecast_area
0,2022-04-01T070448,1001,S108,S119,Kallang
1,2022-04-01T070448,1002,S108,S215,Geylang
2,2022-04-01T070448,1003,S108,S123,Kallang
3,2022-04-01T070448,1004,S108,S215,Kallang
4,2022-04-01T070448,1005,S107,S221,Paya Lebar
...,...,...,...,...,...
63853,2022-03-31T200952,9702,S108,S08,Central Water Catchment
63854,2022-03-31T200952,9703,S44,S211,Sungei Kadut
63855,2022-03-31T200952,9704,S44,S40,Mandai
63856,2022-03-31T200952,9705,S44,S104,Mandai


In [32]:
df = base_df.merge(rainfall_realtime, on = ["call_timestamp", "rainfall_station_id"], how = "left")

In [33]:
len(df) == len(base_df)

True

In [34]:
df2 = df.merge(non_rainfall_realtime, on = ["call_timestamp", "non_rainfall_station_id"], how = "left")

In [35]:
len(df2) == len(base_df)

True

In [36]:
df3 = df2.merge(forecast_2h, on = ["call_timestamp", "2hr_forecast_area"], how = "left")

In [37]:
len(df3) == len(base_df)

True

In [40]:
df4 = df3.merge(forecast_4d, on = ["call_timestamp"])

In [41]:
len(df4) == len(base_df)

True

Left to merge the 24h forecast  
bing data needs the correct time input 