In [15]:
from deltalake import DeltaTable
from deltalake.writer import write_deltalake
import os
import json
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

STORAGE_ACCOUNT_NAME = os.getenv("AZURE_STORAGE_ACCOUNT_NAME")
AZURE_STORAGE_ACCESS_KEY = os.getenv("AZURE_STORAGE_ACCESS_KEY")
CONTAINER_NAME = "test-container"
delta_table_path = f"abfss://{CONTAINER_NAME}@{STORAGE_ACCOUNT_NAME}.dfs.core.windows.net/data"

storage_options = {"azure_storage_account_name": STORAGE_ACCOUNT_NAME, "azure_storage_access_key": AZURE_STORAGE_ACCESS_KEY} 

dt = DeltaTable(delta_table_path, storage_options=storage_options) 

df = dt.to_pandas()


In [16]:
df

Unnamed: 0,turbineId,timestamp,wind_speed,wind_direction,rotor_rpm,active_power,generator_temp,gearbox_temp,pitch_angle
0,T004,2025-06-02T17:03:45.711Z,8.53 knots,81 degrees,14.65 RPM,1498 kW,66 °C,69 Kelvin,16 degrees
1,T006,2025-06-02T17:03:55.707Z,8.41 km/h,143 degrees,14.99 RPM,1541 MW,67 °F,72 °F,13 degrees
2,T003,2025-06-02T17:03:55.709Z,11.46 ft/s,101 degrees,16.54 rps,1556 Watts,66 °C,74 °C,12 degrees
3,T001,2025-06-02T17:03:55.719Z,9.52 m/s,108 degrees,15.47 RPM,1559 kW,65 °C,73 °C,16 degrees
4,T002,2025-06-02T17:03:55.726Z,8.64 km/h,2 rad,14.72 RPM,1517 MW,65 Kelvin,74 Kelvin,16 radians
...,...,...,...,...,...,...,...,...,...
3787,T003,2025-06-02T16:11:15.135Z,10.32 ft/s,356 degrees,15.46 rps,1539 Watts,64 °C,71 °C,16 degrees
3788,T001,2025-06-02T16:11:15.143Z,10.85 m/s,325 degrees,15.85 RPM,1538 kW,63 °C,71 °C,13 degrees
3789,T002,2025-06-02T16:11:15.146Z,11.49 km/h,153 rad,16.68 RPM,1542 MW,65 Kelvin,76 Kelvin,11 radians
3790,T005,2025-06-02T16:11:15.141Z,8.95 m/s,180 mil,14.83 RPM,1497 kWh,66 °C,71 °C,17 mil


In [5]:
print(dt.version())

print(dt.files())

99
['part-00001-106cb19f-7ef0-4f8e-984b-2af56daf4cc4-c000.snappy.parquet', 'part-00001-7dc9baa7-9257-4d40-95b2-c85b09525a72-c000.snappy.parquet', 'part-00001-61c5fa12-1616-4eb0-9d2b-9b41cb4a9ef4-c000.snappy.parquet', 'part-00001-18be6ad2-daff-40a6-9faa-0b1e0c1f44d0-c000.snappy.parquet', 'part-00001-2afb4acf-4efa-40ba-a17f-93638174c04c-c000.snappy.parquet', 'part-00001-8a3afe72-60ea-4c14-9ae2-1902d66b4c52-c000.snappy.parquet', 'part-00001-3dfb762d-8d30-498f-861f-7e1432cf576c-c000.snappy.parquet', 'part-00001-0c6b061c-aa18-4019-899b-135747757b2d-c000.snappy.parquet', 'part-00001-cb369fa6-18b8-4f18-9f31-acec6895b538-c000.snappy.parquet', 'part-00001-2601c2dc-b4d3-4563-be49-ce134c0d27a5-c000.snappy.parquet', 'part-00001-91e2c286-a0aa-4c2c-aebd-4818f50934c6-c000.snappy.parquet', 'part-00001-f1c7e0e2-0626-4e53-bf40-3decb568541e-c000.snappy.parquet', 'part-00001-57bd280c-1333-4f51-b8ee-09a26154e448-c000.snappy.parquet', 'part-00001-c42a2fe9-5caf-42cb-a92d-ebd8e04b8307-c000.snappy.parquet', 'p

In [None]:
import numpy as np

df['timestamp'] = pd.to_datetime(df['timestamp'])

def convert_wind_speed(val):
    if pd.isna(val):
        return np.nan
    if "knots" in val:
        return float(val.replace(" knots", "")) * 0.514444
    elif "km/h" in val:
        return float(val.replace(" km/h", "")) / 3.6
    else:
        try:
            return float(val)
        except:
            return np.nan

def convert_active_power(val):
    if pd.isna(val):
        return np.nan
    if "MW" in val:
        return float(val.replace(" MW", "")) * 1000
    elif "kW" in val:
        return float(val.replace(" kW", ""))
    else:
        try:
            return float(val)
        except:
            return np.nan

def convert_temperature(val):
    if pd.isna(val):
        return np.nan
    if "°F" in val:
        f = float(val.replace(" °F", ""))
        return (f - 32) * 5/9
    elif "°C" in val:
        return float(val.replace(" °C", ""))
    elif "Kelvin" in val:
        k = float(val.replace(" Kelvin", ""))
        return k - 273.15
    else:
        try:
            return float(val)
        except:
            return np.nan

df['wind_speed_mps'] = df['wind_speed'].apply(convert_wind_speed)

df.head()


Unnamed: 0,turbineId,timestamp,wind_speed,wind_direction,rotor_rpm,active_power,generator_temp,gearbox_temp,pitch_angle,wind_speed_mps
0,T004,2025-06-02 17:03:45.711000+00:00,8.53 knots,81 degrees,14.65 RPM,1498 kW,66 °C,69 Kelvin,16 degrees,4.388207
1,T006,2025-06-02 17:03:55.707000+00:00,8.41 km/h,143 degrees,14.99 RPM,1541 MW,67 °F,72 °F,13 degrees,2.336111
2,T003,2025-06-02 17:03:55.709000+00:00,11.46 ft/s,101 degrees,16.54 rps,1556 Watts,66 °C,74 °C,12 degrees,
3,T001,2025-06-02 17:03:55.719000+00:00,9.52 m/s,108 degrees,15.47 RPM,1559 kW,65 °C,73 °C,16 degrees,
4,T002,2025-06-02 17:03:55.726000+00:00,8.64 km/h,2 rad,14.72 RPM,1517 MW,65 Kelvin,74 Kelvin,16 radians,2.4
