In [None]:
# Cell 1: Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pymongo
import os
from dotenv import load_dotenv

%matplotlib inline
sns.set_style('whitegrid')

# Cell 2: Connect to MongoDB and load data

# Load environment variables
load_dotenv()


MONGO_URI = os.getenv("MONGODB_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")

client = pymongo.MongoClient(MONGO_URI)
db = client[DB_NAME]
collection = db[COLLECTION_NAME]

data = list(collection.find({}).sort('timestamp', -1).limit(5000))
df = pd.DataFrame(data)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values('timestamp')

print(f"Dataset size: {len(df)} records")
print(f"Devices: {df['device_id'].unique()}")
df.head()

# Cell 3: Data Visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Temperature over time
df.groupby('device_id')['temperature'].plot(ax=axes[0,0], legend=True)
axes[0,0].set_title('Temperature Over Time')
axes[0,0].set_xlabel('Index')
axes[0,0].set_ylabel('Temperature (°C)')

# Humidity over time
df.groupby('device_id')['humidity'].plot(ax=axes[0,1], legend=True)
axes[0,1].set_title('Humidity Over Time')
axes[0,1].set_xlabel('Index')
axes[0,1].set_ylabel('Humidity (%)')

# Temperature distribution
df['temperature'].hist(bins=30, ax=axes[1,0], edgecolor='black')
axes[1,0].set_title('Temperature Distribution')
axes[1,0].set_xlabel('Temperature (°C)')
axes[1,0].set_ylabel('Frequency')

# Humidity distribution
df['humidity'].hist(bins=30, ax=axes[1,1], edgecolor='black')
axes[1,1].set_title('Humidity Distribution')
axes[1,1].set_xlabel('Humidity (%)')
axes[1,1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

# Cell 4: Feature Engineering & Model Training
# (Copy relevant sections from train_models.py)

# Cell 5: Model Evaluation & Visualization
# Add your evaluation code and visualizations here
