In [123]:
import requests
import pandas as pd
import datetime
"""
For testing on local machine

The function works by requesting a json of the latest road incidents from the LTA Dynamic Data API. This lambda is queried hourly. After quering the data, 
it checks if the api returns any data. If there is, it converts it into a pandas table, and creates the Date and Time column. It then filters out to keep the data
from the past hourly, and checks again if there is any data. If there is, it converts it into a json table, and is inserted into the DynamoDB table.
"""

# URL and Pulling Data
traffic_url = "http://datamall2.mytransport.sg/ltaodataservice/TrafficIncidents"
api_key = open("api_key.txt").read()
api_headers = {"AccountKey": api_key, "accept": "application/json"}

request = requests.get(url=traffic_url, headers=api_headers)
print(request)
data = request.json()
dt = datetime.datetime.now()    # + datetime.timedelta(hours = 8)
                                # Only needed in AWS due to time difference

df = pd.DataFrame(data["value"])

<Response [200]>


In [124]:
df = pd.DataFrame(data["value"])
df

Unnamed: 0,Type,Latitude,Longitude,Message
0,Vehicle breakdown,1.379377,103.776146,(8/1)22:05 Vehicle breakdown on BKE (towards P...
1,Vehicle breakdown,1.330023,103.82095,(8/1)22:03 Vehicle breakdown on PIE (towards T...
2,Vehicle breakdown,1.364223,103.956758,(8/1)21:59 Vehicle breakdown on TPE (towards P...
3,Roadwork,1.347536,103.966132,(8/1)21:45 Roadworks on PIE (towards Tuas) at ...
4,Roadwork,1.27864,103.824916,(8/1)21:37 Roadworks on AYE (towards MCE) at L...
5,Roadwork,1.311912,103.803815,(8/1)15:09 Roadworks on Farrer Road (towards L...


In [113]:
len(df) # Add line to break

if (len(df) > 0):
    print()
    # Continue with code




In [114]:
# Extract Date and Time out of the Message to keep the main message only

date_regex = "([0-9]{1,2}/[0-9]{1,2})"
df["Date"] = df["Message"].str.extract(pat = date_regex) + "/" + str(dt.year)

time_regex = "([0-9]{1,2}:[0-9]{1,2})"
df["Time"] = df["Message"].str.extract(pat = time_regex)

df["Date_Time"] = pd.to_datetime(df["Date"] + df["Time"], format = "%d/%m/%Y%H:%S")

message_regex = "\d\s(.*$)"
df["Message"] = df["Message"].str.extract(pat = message_regex)

df = df[["Type", "Date_Time", "Message", "Latitude", "Longitude"]]

In [115]:
df

Unnamed: 0,Type,Date_Time,Message,Latitude,Longitude
0,Vehicle breakdown,2023-01-08 19:00:02,Vehicle breakdown on SLE (towards CTE) after B...,1.426443,103.781225
1,Roadwork,2023-01-08 15:00:09,Roadworks on Farrer Road (towards Lornie) at F...,1.311912,103.803815
2,Roadwork,2023-01-08 14:00:50,Roadworks on BKE (towards Woodlands) before KJ...,1.388921,103.774583
3,Road Block,2023-01-08 12:00:00,Road Closure on Temple Street (towards Trengga...,1.283531,103.843068
4,Heavy Traffic,2023-01-08 11:00:26,Heavy Traffic on Orchard Turn (towards Orchard...,1.301712,103.834921


In [116]:
# Filtering to keep only previous hour of data (Removed)
if dt.hour >= 1:
    prev_hour = str(dt.hour - 1)
    df_boolean = df["Time"].str.match(pat = (prev_hour + ":"))
else:
    prev_hour = "23"
    df_boolean = df["Time"].str.match(pat = "23:")

KeyError: 'Time'

In [117]:
# Filtering to keep only past 15 minutes of data
dt_last15 = dt-datetime.timedelta(minutes=15)
print(dt_last15)

2023-01-08 19:00:41.755815


In [118]:
df_tocloud = df[df['Date_Time'] > dt_last15].reset_index(drop = True)
df_tocloud

Unnamed: 0,Type,Date_Time,Message,Latitude,Longitude


In [119]:
df_tocloud_key = dt.strftime(format = "%Y-%m-%d %H:%m")
df_tocloud_key

'2023-01-08 19:01'

In [128]:
dt = datetime.datetime.now()
dt.strftime(format = "%Y-%m-%d %H:%M")

'2023-01-09 22:44'

## For Testing and Subsequent Wring

In [120]:
# Converting to JSON for Writing to Cloud
df_tocloud = df_tocloud.to_json()


# Reading from Cloud
import json
df_raw = json.loads(df_tocloud)
df_local = pd.DataFrame(df_raw)
df_local

Unnamed: 0,Type,Date_Time,Message,Latitude,Longitude
