In [2]:
# 📦 Libraries
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression

# 🗂️ Load CSV
df = pd.read_csv("dht22_cloud_log.csv")

# 👀 Quick look
print("Columns:", df.columns.tolist())
print("Shape:", df.shape)
df.head()

Columns: ['timestamp', 'temperature', 'humidity']
Shape: (170, 3)


Unnamed: 0,timestamp,temperature,humidity
0,2025-08-11 16:56:47,23.700001,58.5
1,2025-08-11 16:56:48,23.700001,58.0
2,2025-08-11 16:56:58,23.700001,57.700001
3,2025-08-11 16:57:03,23.700001,60.099998
4,2025-08-11 16:57:08,23.700001,58.099998


In [3]:
# 🎯 Features (X) and target (y)
X = df[["temperature"]].values   # independent variable
y = df["humidity"].values        # dependent variable

# 🔨 Fit linear regression model
model = LinearRegression().fit(X, y)

# 📊 Coefficients
slope = model.coef_[0]
intercept = model.intercept_
r2 = model.score(X, y)

print(f"Equation: humidity = {slope:.4f} * temperature + {intercept:.4f}")
print(f"R^2: {r2:.4f}")

Equation: humidity = -2.6074 * temperature + 119.3904
R^2: 0.4201


In [4]:
# 🌡️ min and max observed temperatures
t_min, t_max = df["temperature"].min(), df["temperature"].max()
print(f"Min temp: {t_min:.2f}, Max temp: {t_max:.2f}")

# 🔮 100 equally spaced test temperatures
temp_test = np.linspace(t_min, t_max, 100).reshape(-1,1)

# 🤖 predict humidity for these temps
humidity_pred = model.predict(temp_test)

Min temp: 22.80, Max temp: 23.70


In [5]:
fig = px.scatter(df, x="temperature", y="humidity",
                 title="Temperature vs Humidity (DHT22 data)",
                 labels={"temperature":"Temperature (°C)", "humidity":"Humidity (%)"})

# add regression line
fig.add_scatter(x=temp_test.ravel(), y=humidity_pred, mode="lines", name="Regression line")

fig.show()

In [6]:
# 🧹 filter out possible outliers
df_filtered = df[(df["temperature"] > 22.85) & (df["temperature"] < 23.65)]
print("Before:", df.shape, "After filtering:", df_filtered.shape)

# retrain model
Xf = df_filtered[["temperature"]].values
yf = df_filtered["humidity"].values
model_f = LinearRegression().fit(Xf, yf)

slope_f = model_f.coef_[0]
intercept_f = model_f.intercept_
r2_f = model_f.score(Xf, yf)

print(f"Filtered Equation: humidity = {slope_f:.4f}*temperature + {intercept_f:.4f}")
print(f"Filtered R^2: {r2_f:.4f}")

# predictions for trend line
temp_test_f = np.linspace(df_filtered["temperature"].min(),
                          df_filtered["temperature"].max(), 100).reshape(-1,1)
humidity_pred_f = model_f.predict(temp_test_f)

# plot
fig = px.scatter(df_filtered, x="temperature", y="humidity",
                 title="Filtered Temperature vs Humidity",
                 labels={"temperature":"Temperature (°C)", "humidity":"Humidity (%)"})
fig.add_scatter(x=temp_test_f.ravel(), y=humidity_pred_f,
                mode="lines", name="Regression line (filtered)")
fig.show()

Before: (170, 3) After filtering: (145, 3)
Filtered Equation: humidity = -2.9894*temperature + 128.1277
Filtered R^2: 0.3830


In [7]:
# 🧹 filter to remove more extremes: keep humidity only between 58% and 60%
df_filtered2 = df[(df["humidity"] >= 58) & (df["humidity"] <= 60)]
print("Before:", df.shape, "After filtering:", df_filtered2.shape)

# retrain model
X2 = df_filtered2[["temperature"]].values
y2 = df_filtered2["humidity"].values
model2 = LinearRegression().fit(X2, y2)

slope2 = model2.coef_[0]
intercept2 = model2.intercept_
r2_2 = model2.score(X2, y2)

print(f"Tighter Filter Equation: humidity = {slope2:.4f}*temperature + {intercept2:.4f}")
print(f"Tighter Filter R^2: {r2_2:.4f}")

# predictions
temp_test2 = np.linspace(df_filtered2["temperature"].min(),
                         df_filtered2["temperature"].max(), 100).reshape(-1,1)
humidity_pred2 = model2.predict(temp_test2)

# plot
fig = px.scatter(df_filtered2, x="temperature", y="humidity",
                 title="Temperature vs Humidity (tighter filtered)",
                 labels={"temperature":"Temperature (°C)", "humidity":"Humidity (%)"})
fig.add_scatter(x=temp_test2.ravel(), y=humidity_pred2,
                mode="lines", name="Regression line (tighter filter)")
fig.show()

Before: (170, 3) After filtering: (101, 3)
Tighter Filter Equation: humidity = -1.2659*temperature + 88.2723
Tighter Filter R^2: 0.1847
