In [None]:
import pandas as pd
import numpy as np
import GPy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the data
data = pd.read_csv('sensor_data.csv')

# Impute the missing values using mean imputation
data['temperature'].fillna(data['temperature'].mean(), inplace=True)
data['humidity'].fillna(data['humidity'].mean(), inplace=True)

# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Scale and normalize the features
scaler = StandardScaler()
train_features = scaler.fit_transform(train_data[['temperature', 'humidity']])
train_labels = np.array(train_data[['temperature', 'humidity']])

test_features = scaler.transform(test_data[['temperature', 'humidity']])
test_labels = np.array(test_data[['temperature', 'humidity']])

# Define the GPR model
kernel = GPy.kern.RBF(input_dim=2, variance=1., lengthscale=1.)
gpr_model = GPy.models.GPRegression(train_features, train_labels, kernel)

# Train the GPR model
gpr_model.optimize()

# Evaluate the GPR model
rmse = np.sqrt(np.mean((test_labels - gpr_model.predict(test_features)[0])**2))
print("RMSE:", rmse)

# Impute the missing values using the GPR model
missing_data = data.loc[data['temperature'].isnull() | data['humidity'].isnull()]
missing_data['temperature'].fillna(missing_data['temperature'].mean(), inplace=True)
missing_data['humidity'].fillna(missing_data['humidity'].mean(), inplace=True)

missing_features = scaler.transform(missing_data[['temperature', 'humidity']])
imputed_labels = gpr_model.predict(missing_features)[0]
missing_data[['temperature', 'humidity']] = imputed_labels

# Merge the imputed data back into the original data frame
data.update(missing_data)

# Show the first few rows of the imputed data
print(data.head())
