<a href="https://colab.research.google.com/github/anushka827/model-1/blob/main/Untitled29.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Step 1: Load the datasets
# Load training data from 'train.csv'
train_df = pd.read_csv('train.csv')
# Load test data from 'test.csv'
test_df = pd.read_csv('test.csv')

# Step 2: Define features and target variable
# These are the columns used as input for the model
features = ['clonesize', 'honeybee', 'bumbles', 'andrena', 'osmia',
            'MaxOfUpperTRange', 'MinOfUpperTRange', 'AverageOfUpperTRange',
            'MaxOfLowerTRange', 'MinOfLowerTRange', 'AverageOfLowerTRange',
            'RainingDays', 'AverageRainingDays', 'fruitset', 'fruitmass', 'seeds']
# This is the column the model will try to predict
target = 'output'

# Separate features (X) and target (y) for the training set
X_train = train_df[features]
y_train = train_df[target]

# Prepare the features for the test set (no 'output' column in test data)
X_test = test_df[features]

# Step 3: Initialize and train the GradientBoostingRegressor model
# n_estimators: The number of boosting stages to perform.
# learning_rate: Controls the contribution of each tree.
# random_state: A seed for reproducibility.
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Train the model using the training data
gb_model.fit(X_train, y_train)

# Step 4: Make predictions on the training data
# These predictions are used to evaluate how well the model learned from the data it was trained on.
y_train_pred_gb = gb_model.predict(X_train)

# Step 5: Evaluate the model on the training data
# Calculate common regression metrics:
# MAE (Mean Absolute Error)
# MSE (Mean Squared Error)
# RMSE (Root Mean Squared Error)
# R2 (R-squared)
mae_train_gb = mean_absolute_error(y_train, y_train_pred_gb)
mse_train_gb = mean_squared_error(y_train, y_train_pred_gb)
rmse_train_gb = mse_train_gb**0.5
r2_train_gb = r2_score(y_train, y_train_pred_gb)

# Print the evaluation metrics for the training set
print("Gradient Boosting Regressor Training Metrics:")
print(f"Mean Absolute Error (MAE): {mae_train_gb:.2f}")
print(f"Mean Squared Error (MSE): {mse_train_gb:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse_train_gb:.2f}")
print(f"R-squared (R2): {r2_train_gb:.2f}")

# Step 6: Make predictions on the test data
# These are predictions on unseen data.
y_test_pred_gb = gb_model.predict(X_test)

# Create a DataFrame to store the test predictions
test_predictions_gb_df = pd.DataFrame({
    'id': test_df['id'],
    'predicted_output_gb': y_test_pred_gb
})

# Display the first few rows of the generated test predictions
print("\nGradient Boosting Test Predictions Head:")
print(test_predictions_gb_df.head())

# Save the test predictions to a new CSV file
test_predictions_gb_df.to_csv('test_predictions_gradient_boosting.csv', index=False)

Gradient Boosting Regressor Training Metrics:
Mean Absolute Error (MAE): 236.90
Mean Squared Error (MSE): 125680.93
Root Mean Squared Error (RMSE): 354.52
R-squared (R2): 0.93

Gradient Boosting Test Predictions Head:
   id  predicted_output_gb
0   1          4230.733904
1   2          3453.448411
2   3          6379.989448
3   4          4163.950768
4   5          7423.623281
