In [1]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Step 1: Load the cleaned dataset
data_cleaned = pd.read_csv('cleaned_dataset.csv')

# Step 2: Define feature and target columns
feature_columns = ['REP', 'VARIETY', 'GID']  # Predictor columns
target_columns = [col for col in data_cleaned.columns if col not in feature_columns]

# Step 3: Encode categorical features (REP, VARIETY, GID)
label_encoders_features = {}
for col in feature_columns:
    le = LabelEncoder()
    data_cleaned[col] = le.fit_transform(data_cleaned[col])  # Encode features
    label_encoders_features[col] = le

# Save the label encoders
encoders_file_path = 'label_encoders.pkl'
with open(encoders_file_path, 'wb') as f:
    pickle.dump(label_encoders_features, f)

print(f"Encoders saved to: {encoders_file_path}")

X = data_cleaned[feature_columns]
y = data_cleaned[target_columns]

# Step 4: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the model using GradientBoostingRegressor for regression tasks
regressor = MultiOutputRegressor(GradientBoostingRegressor(random_state=42))
regressor.fit(X_train, y_train)

# Step 6: Save the trained model using pickle
model_file_path = 'gradient_boosting_model.pkl'
with open(model_file_path, 'wb') as f:
    pickle.dump(regressor, f)

print(f"Model saved to: {model_file_path}")

# Step 7: Evaluate the model
y_pred = regressor.predict(X_test)

# Calculate evaluation metrics
metrics = []
for i, col in enumerate(target_columns):
    mse = mean_squared_error(y_test.iloc[:, i], y_pred[:, i])
    r2 = r2_score(y_test.iloc[:, i], y_pred[:, i])
    metrics.append((col, mse, r2))
    print(f"Metrics for {col}:")
    print(f"  Mean Squared Error: {mse:.4f}")
    print(f"  R2 Score: {r2:.4f}")


Encoders saved to: label_encoders.pkl
Model saved to: gradient_boosting_model.pkl
Metrics for SEEDS:
  Mean Squared Error: 32864.9676
  R2 Score: 0.2863
Metrics for SEEDKGHA:
  Mean Squared Error: 73942.9926
  R2 Score: 0.1098
Metrics for DFF:
  Mean Squared Error: 33.9587
  R2 Score: 0.4437
Metrics for MATURE:
  Mean Squared Error: 92.6684
  R2 Score: 0.3942


In [13]:
pip show sklearn

Note: you may need to restart the kernel to use updated packages.


Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\ProgramData\anaconda3\Lib\site-packages\pip\__main__.py", line 22, in <module>
    from pip._internal.cli.main import main as _main
  File "C:\ProgramData\anaconda3\Lib\site-packages\pip\_internal\cli\main.py", line 10, in <module>
    from pip._internal.cli.autocompletion import autocomplete
  File "C:\ProgramData\anaconda3\Lib\site-packages\pip\_internal\cli\autocompletion.py", line 10, in <module>
    from pip._internal.cli.main_parser import create_main_parser
  File "C:\ProgramData\anaconda3\Lib\site-packages\pip\_internal\cli\main_parser.py", line 9, in <module>
    from pip._internal.build_env import get_runnable_pip
  File "C:\ProgramData\anaconda3\Lib\site-packages\pip\_internal\build_env.py", line 19, in <module>
    from pip._internal.cli.spinners import open_spinner
  File "C:\ProgramData\anaconda3\Lib\site-packages\pip\_inte

In [17]:
!pip show scikit-learn


Name: scikit-learn
Version: 1.4.2
Summary: A set of python modules for machine learning and data mining
Home-page: https://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: C:\ProgramData\anaconda3\Lib\site-packages
Requires: joblib, numpy, scipy, threadpoolctl
Required-by: imbalanced-learn, librosa
