
# Latest API data
This notebook consists of 3 parts:
1. Get new data from the API
2. Data preprocessing and feature engineering
3. Creating or backfilling the feature group

In [1]:
# Standard library imports for utilities and data manipulation
import uuid
import os
from datetime import datetime, timedelta
import io
import base64

# External libraries for data handling, networking, and time zones
import pandas as pd
import requests
import json
import pytz

import openmeteo_requests
import requests_cache
from retry_requests import retry

# Environment variable management
from dotenv import load_dotenv
load_dotenv()

# Hopsworks
import hopsworks                

## 1. Get new data from the API

### Sensor Data Access

Here is the information given by the company so we can acces thir data.

- GET request to `data.sensade.com` 
- Authentication: `Basic Auth` (user: miknie20@student.aau.dk, password: GitHub Secret)

### Sensors

Two sensors installed:

- `0080E115003BEA91` (Hw2.0 Fw2.0) Installed towards building

- `0080E115003E3597` (Hw2.0 Fw2.0) Installed towards bike lane

In [2]:
# Create a timezone object for GMT+2
timezone = pytz.timezone('Europe/Copenhagen')
now = datetime.now(timezone)  # Get current time 
today = now 
yesterday = today - timedelta(days=1)
tomorrow = today + timedelta(days=1)
#Defining two hours ago
two_hours_ago = now - timedelta(hours=2)

In [3]:
# Format 'today', 'tomorrow', and 'yesterday' as "YYYY-MM-DD"
formatted_today = today.strftime('%Y-%m-%d %H:%M:%S')
formatted_tomorrow = tomorrow.strftime('%Y-%m-%d %H:%M:%S')
formatted_yesterday = yesterday.strftime('%Y-%m-%d %H:%M:%S')
formatted_two_hours_ago = two_hours_ago.strftime('%Y-%m-%d %H:%M:%S')
dev_eui_building = "0080E115003BEA91"
dev_eui_bikelane = "0080E115003E3597"
url = "https://data.sensade.com"
username = "ajakup20@student.aau.dk"


basic_auth = base64.b64encode(f"{username}:{os.getenv('API_PASSWORD')}".encode())
headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Basic {basic_auth.decode("utf-8")}'
}

In [4]:
# API call that takes the data from given sensor from yesterday until tomorrow
def API_call(dev_eui, from_date, to_date):
    payload = json.dumps({
    "dev_eui": dev_eui,
    "from": from_date,
    "to": to_date
})

    response = requests.request("GET", url, headers=headers, data=payload)

    if response.status_code != 200:
     print("Failed to fetch data: Status code", response.status_code)     
     print("Response:", response.text)     
     exit(13)

    csv_data = response.text
    df = pd.read_csv(io.StringIO(csv_data))
    return df

In [5]:
# Running the API call function with the given parameters on the building sensor
df_building_from_api = API_call(dev_eui_building, formatted_yesterday, formatted_tomorrow)

In [6]:
# Running the API call function with the given parameters on the bikelane sensor
df_bikelane_from_api = API_call(dev_eui_bikelane, formatted_yesterday, formatted_tomorrow)

In [7]:
# Defning the newest data from the API calls
df_building_newest = df_building_from_api.tail(1)
df_bikelane_newest = df_bikelane_from_api.tail(1)

In [8]:
df_building_newest

Unnamed: 0,time,battery,temperature,x,y,z,0_radar,1_radar,2_radar,3_radar,4_radar,5_radar,6_radar,7_radar,package_type,f_cnt,dr,snr,rssi,hw_fw_version
125,2024-05-21 07:36:33.696000,3.24,20.375,318,-737,-604,72.0,13.0,25.0,20.0,7.0,6.0,6.0,6.0,PackageType.HEART_BEAT,5711,5,0.2,-86,DataVersion.HW_2FW2_X_X


In [9]:
df_bikelane_newest

Unnamed: 0,time,battery,temperature,x,y,z,0_radar,1_radar,2_radar,3_radar,4_radar,5_radar,6_radar,7_radar,package_type,f_cnt,dr,snr,rssi,hw_fw_version
105,2024-05-21 07:44:29.077000,3.08,18.0,-233,-249,-371,93.0,59.0,85.0,56.0,23.0,29.0,22.0,17.0,PackageType.HEART_BEAT,5580,5,-5.0,-88,DataVersion.HW_2FW2_X_X


## 2. Preprocessing and feature engineering

We apply the same methods as in notebook 1: creating unique IDs, converting the time column, converting radar names, changing data types to floats, and finally creating an empty column for the mag_cluster, as we haven't applied our models yet.

In [10]:
df_building = df_building_newest.copy()
df_bikelane = df_bikelane_newest.copy()

In [11]:
#converting the time column to datetime
df_bikelane['time'] = pd.to_datetime(df_bikelane['time'])
df_building['time'] = pd.to_datetime(df_building['time'])

In [12]:
#create a column for the time in the format of "YYYY-MM-DD HH" to merge with weather data
df_bikelane['time_hour'] = df_bikelane['time'].dt.strftime('%Y-%m-%d %H')
df_building['time_hour'] = df_building['time'].dt.strftime('%Y-%m-%d %H')
# Converting the time_hour column to datetime
df_bikelane['time_hour'] = pd.to_datetime(df_bikelane['time_hour'])
df_building['time_hour'] = pd.to_datetime(df_building['time_hour'])

### Weather data column

In [14]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [15]:

weather_url = "https://api.open-meteo.com/v1/forecast"
weather_params = {
	"latitude": 57.01,
	"longitude": 9.99,
	"hourly": ["temperature_2m", "precipitation"],
	"timezone": "Europe/Berlin",
	"forecast_days": 1
}
responses = openmeteo.weather_api(weather_url, params=weather_params)

In [16]:
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["precipitation"] = hourly_precipitation

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

Coordinates 57.01040267944336°N 9.992218017578125°E
Elevation 23.0 m asl
Timezone b'Europe/Berlin' b'CEST'
Timezone difference to GMT+0 7200 s
                        date  temperature_2m  precipitation
0  2024-05-20 22:00:00+00:00       17.804501            0.0
1  2024-05-20 23:00:00+00:00       16.304501            0.0
2  2024-05-21 00:00:00+00:00       16.054501            0.0
3  2024-05-21 01:00:00+00:00       15.504500            0.0
4  2024-05-21 02:00:00+00:00       14.354500            0.0
5  2024-05-21 03:00:00+00:00       13.804501            0.0
6  2024-05-21 04:00:00+00:00       14.354500            0.0
7  2024-05-21 05:00:00+00:00       15.704500            0.0
8  2024-05-21 06:00:00+00:00       16.604500            0.0
9  2024-05-21 07:00:00+00:00       17.704500            0.0
10 2024-05-21 08:00:00+00:00       18.304501            0.0
11 2024-05-21 09:00:00+00:00       19.204500            0.0
12 2024-05-21 10:00:00+00:00       19.954500            0.0
13 2024-05-21 11:

In [17]:
#remove the timezone from the date column
hourly_dataframe['date'] = hourly_dataframe['date'].dt.tz_localize(None)
#Convert to datetime object
hourly_dataframe['date'] = pd.to_datetime(hourly_dataframe['date'])

In [18]:
# Merging the weather data with the building sensor data
df_building = pd.merge(df_building, hourly_dataframe, left_on='time_hour', right_on='date', how='left')
# Merging the weather data with the bikelane sensor data
df_bikelane = pd.merge(df_bikelane, hourly_dataframe, left_on='time_hour', right_on='date', how='left')

In [19]:
df_building = df_building.drop(columns=['date'])
df_bikelane = df_bikelane.drop(columns=['date'])

In [20]:
#adding two hours to the time  and time_hour column
df_bikelane['time'] = df_bikelane['time'] + timedelta(hours=2)
df_bikelane['time_hour'] = df_bikelane['time_hour'] + timedelta(hours=2)
df_building['time'] = df_building['time'] + timedelta(hours=2)
df_building['time_hour'] = df_building['time_hour'] + timedelta(hours=2)

In [21]:
# Create a unique identifier for each row in the datasets
def create_id(df, dataset_name):
    # Assign the sensor prefix based on the dataset name
    if dataset_name == 'df_building':
        df['psensor'] = "BUILDING"
    elif dataset_name == 'df_bikelane':
        df['psensor'] = "BIKELANE"
    else:
        raise ValueError("Unknown dataset name provided")

    # Create a new column 'id' with a unique identifier for each row
    df['id'] = df['time'].astype(str) + '_' + df['psensor']

    return df

In [22]:
# Applying the function to the datasets
df_bikelane = create_id(df_bikelane, 'df_bikelane')
df_building = create_id(df_building, 'df_building')

In [23]:
#Renaming the radar columns to start with radar
df_bikelane = df_bikelane.rename(columns={'0_radar': 'radar_0', '1_radar': 'radar_1', '2_radar': 'radar_2', '3_radar': 'radar_3', '4_radar': 'radar_4', '5_radar': 'radar_5', '6_radar': 'radar_6', '7_radar': 'radar_7'})
df_building = df_building.rename(columns={'0_radar': 'radar_0', '1_radar': 'radar_1', '2_radar': 'radar_2', '3_radar': 'radar_3', '4_radar': 'radar_4', '5_radar': 'radar_5', '6_radar': 'radar_6', '7_radar': 'radar_7'})

In [24]:
# Converting the columns to float
df_bikelane[['x','y','z', 'radar_0', 'radar_1', 'radar_2', 'radar_3', 'radar_4', 'radar_5', 'radar_6', 'radar_7', 'f_cnt', 'dr', 'rssi']] = df_bikelane[['x','y','z', 'radar_0', 'radar_1', 'radar_2', 'radar_3', 'radar_4', 'radar_5', 'radar_6', 'radar_7', 'f_cnt', 'dr', 'rssi']].astype(float)
df_building[['x','y','z', 'radar_0', 'radar_1', 'radar_2', 'radar_3', 'radar_4', 'radar_5', 'radar_6', 'radar_7', 'f_cnt', 'dr', 'rssi']] = df_building[['x','y','z', 'radar_0', 'radar_1', 'radar_2', 'radar_3', 'radar_4', 'radar_5', 'radar_6', 'radar_7', 'f_cnt', 'dr', 'rssi']].astype(float)


In [25]:
#making an empty label column
df_bikelane['mag_cluster'] = "null"
df_building['mag_cluster'] = "null"

## Backfill or create feature group

In [27]:
# Connceting to the Hopsworks project

project = hopsworks.login(project="annikaij")

fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/549019
Connected. Call `.close()` to terminate connection gracefully.


In [28]:
bikelane_fg = fs.get_or_create_feature_group(name="api_bikelane_newest",
                                  version=1,
                                  primary_key=["id"],
                                  event_time='time',
                                  description="New bike lane data",
                                  online_enabled=True,
                                 )
bikelane_fg.insert(df_bikelane)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/549019/fs/544841/fg/834920


Uploading Dataframe: 0.00% |          | Rows 0/1 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: api_bikelane_newest_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/549019/jobs/named/api_bikelane_newest_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x7b76d8377400>, None)

In [29]:
building_fg = fs.get_or_create_feature_group(name="api_building_newest",
                                    version=1,
                                    primary_key=["id"],
                                    event_time='time',
                                    description="New building data",
                                    online_enabled=True
                                     )
building_fg.insert(df_building)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/549019/fs/544841/fg/834921


Uploading Dataframe: 0.00% |          | Rows 0/1 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: api_building_newest_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/549019/jobs/named/api_building_newest_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x7b76940bd390>, None)

## **Next up:** 3: Feature view creation
Go to the 3_featureview_creation.ipynb notebook