In [1]:
import influxdb_client, os, time
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS
import pandas as pd
import time

# Prequisites

In [2]:
token = os.environ.get("INFLUXDB_TOKEN")
org = "th-koeln"
url = "http://localhost:8086"
bucket = "iot_telemetry_data"

# Transforming CSV for InfluxDB

In [3]:
iot_tele_df = pd.read_csv("data/iot_telemetry_data.csv")
iot_tele_df = iot_tele_df.dropna()

# Convert the 'ts' column to datetime format
iot_tele_df['ts'] = pd.to_datetime(iot_tele_df['ts'], unit='s')

iot_tele_df.tail()

Unnamed: 0,ts,device,co,humidity,light,lpg,motion,smoke,temp
405179,2020-07-20 00:03:33.162014961,00:0f:00:70:91:0a,0.003745,75.300003,False,0.006247,False,0.016437,19.200001
405180,2020-07-20 00:03:33.576560736,b8:27:eb:bf:9d:51,0.005882,48.5,False,0.00866,False,0.023301,22.2
405181,2020-07-20 00:03:36.167959213,1c:bf:ce:15:ec:4d,0.00454,75.699997,True,0.007181,False,0.019076,26.6
405182,2020-07-20 00:03:36.979521513,00:0f:00:70:91:0a,0.003745,75.300003,False,0.006247,False,0.016437,19.200001
405183,2020-07-20 00:03:37.264312506,b8:27:eb:bf:9d:51,0.005914,48.4,False,0.008695,False,0.0234,22.2


In [4]:
# Print date range
print("Date range: ", iot_tele_df['ts'].min(), " - ", iot_tele_df['ts'].max())

Date range:  2020-07-12 00:01:34.385974646  -  2020-07-20 00:03:37.264312506


**Important concepts**

1. *Measurement*: A specific measure from the dataset f.e. iot_telemetry.
2. *Tag*: A key value pair to specify group to filter on f.e. devices --> filter device A or B.
3. *Field* & *Value*: A concrete gathered variable and its characteristic.

In [4]:
measurement_name = "iot_telemetry"

In [None]:
points = []

for index, row in iot_tele_df.iterrows():
    point = (
        Point(measurement_name)
        .tag("device", row["device"])
        .field("carbon_monoxide", float(row["co"]))
        .field("humidity", float(row["humidity"]))
        .field("light", bool(row["light"]))
        .field("liquid_petroleum_gas", float(row["lpg"]))
        .field("motion", bool(row["motion"]))
        .field("smoke", float(row["smoke"]))
        .field("temp", float(row["temp"]))
        .time(pd.to_datetime(row["ts"]), WritePrecision.NS)
    )
    points.append(point)

In [6]:
for i in range(min(3, len(points))): 
    print(points[i])

iot_telemetry,device=b8:27:eb:bf:9d:51 carbon_monoxide=0.0049559386483912,humidity=51,light=false,liquid_petroleum_gas=0.0076508222705571,motion=false,smoke=0.0204112701224129,temp=22.7 1594512094385974000
iot_telemetry,device=00:0f:00:70:91:0a carbon_monoxide=0.0028400886071015,humidity=76,light=false,liquid_petroleum_gas=0.005114383400977,motion=false,smoke=0.0132748367048515,temp=19.700000762939453 1594512094735567000
iot_telemetry,device=b8:27:eb:bf:9d:51 carbon_monoxide=0.0049760123404216,humidity=50.9,light=false,liquid_petroleum_gas=0.007673227406398,motion=false,smoke=0.0204751255761782,temp=22.6 1594512098073572000


In [7]:
# Write data to InfluxDB with exception handling
try:
    write_client = influxdb_client.InfluxDBClient(url=url, token=token, org=org)
    write_api = write_client.write_api(write_options=SYNCHRONOUS)
    write_api.write(bucket=bucket, org=org, record=points)
    print("Data written to InfluxDB successfully!")
except Exception as e:
    print(f"Failed to write data to InfluxDB: {e}")
finally:
    if 'write_client' in locals() and write_client:
        write_client.close()

Data written to InfluxDB successfully!


# Reading the bucket

In [5]:
    # Modified query to pivot data for a structured DataFrame and read all data
query_read_all = f'''from(bucket: "{bucket}")
    |> range(start: 0)
    |> filter(fn: (r) => r._measurement == "{measurement_name}")
    |> pivot(rowKey:["_time", "device"], columnKey: ["_field"], valueColumn: "_value")'''

In [7]:
# Read data from InfluxDB to verify and display as DataFrame
read_client = None
try:
    read_client = influxdb_client.InfluxDBClient(url=url, token=token, org=org)
    query_api = read_client.query_api()
    start = time.time()
    print("Reading data from InfluxDB...")
    df_result = query_api.query_data_frame(query=query_read_all, org=org)
    end = time.time()
    print(f"Data read successfully in {end - start:.2f} seconds!")
except Exception as e:
    print(f"Failed to read data from InfluxDB: {e}")
finally:
    if read_client:
        read_client.close()
        print('InfluxDB read client closed.')

Reading data from InfluxDB...
Data read successfully in 22.22 seconds!
InfluxDB read client closed.


In [8]:
df_result.head()

Unnamed: 0,result,table,_start,_stop,_time,_measurement,device,carbon_monoxide,humidity,light,liquid_petroleum_gas,motion,smoke,temp
0,_result,0,1970-01-01 00:00:00+00:00,2025-05-15 10:10:01.581731+00:00,2020-07-12 00:01:34.735567+00:00,iot_telemetry,00:0f:00:70:91:0a,0.00284,76.0,False,0.005114,False,0.013275,19.700001
1,_result,0,1970-01-01 00:00:00+00:00,2025-05-15 10:10:01.581731+00:00,2020-07-12 00:01:46.869076+00:00,iot_telemetry,00:0f:00:70:91:0a,0.002938,76.0,False,0.005241,False,0.013628,19.700001
2,_result,0,1970-01-01 00:00:00+00:00,2025-05-15 10:10:01.581731+00:00,2020-07-12 00:02:02.785731+00:00,iot_telemetry,00:0f:00:70:91:0a,0.002905,75.800003,False,0.005199,False,0.013509,19.700001
3,_result,0,1970-01-01 00:00:00+00:00,2025-05-15 10:10:01.581731+00:00,2020-07-12 00:02:11.476376+00:00,iot_telemetry,00:0f:00:70:91:0a,0.002938,75.800003,False,0.005241,False,0.013628,19.700001
4,_result,0,1970-01-01 00:00:00+00:00,2025-05-15 10:10:01.581731+00:00,2020-07-12 00:02:15.289086+00:00,iot_telemetry,00:0f:00:70:91:0a,0.00284,76.0,False,0.005114,False,0.013275,19.700001


In [13]:
len(df_result)

405171