In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from joblib import dump

In [4]:
# --- Load dataset ---
data = pd.read_csv(r"Datasets\food-contamination-data-cleaned-2.csv")

In [5]:
# --- Define features and target variable ---
X = data.drop('ResultValue', axis=1)
y = data['ResultValue']

In [6]:
# --- Train-test split ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# --- Apply log transformation to target variable ---
y_train = np.log1p(y_train)
y_test = np.log1p(y_test)

In [8]:
# Identify categorical and numerical columns (assuming you've done this)
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(exclude=['object']).columns.tolist()

In [9]:
# --- Define model pipeline with preprocessing ---
numerical_transformer = 'passthrough'
categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers=[('num', numerical_transformer, numerical_cols),
                                               ('cat', categorical_transformer, categorical_cols)])

In [10]:
# --- Decision Tree Model ---
decision_tree = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', DecisionTreeRegressor(random_state=42))
])

In [11]:
# --- Train and evaluate Decision Tree ---
decision_tree.fit(X_train, y_train)
y_pred_dt = decision_tree.predict(X_test)
print(f"Decision Tree MSE: {mean_squared_error(y_test, y_pred_dt)}")
print(f"Decision Tree R2 Score: {r2_score(y_test, y_pred_dt)}")

Decision Tree MSE: 0.3871044290112168
Decision Tree R2 Score: 0.8228322288638905


In [13]:
dump(decision_tree, "contamination-prediction-model.joblib")

['contamination-prediction-model.joblib']