In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import joblib

# Load the dataset
dataset_path = 'wsn_loc_dataset.csv'  # Replace with your actual file path
data = pd.read_csv(dataset_path)

# Display the first few rows of the dataset
print(data.head())

# Assuming the last column is the target variable (e.g., location coordinates)
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the GBDT model
gbdt = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Train the model
gbdt.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gbdt.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Plotting the results
plt.scatter(y_test, y_pred)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Actual vs Predicted')
plt.show()

# Save the model to a file
model_path = 'gbdt_wsn_loc_model.pkl'
joblib.dump(gbdt, model_path)