In [16]:
import pandas as pd
df = pd.read_csv('crop_yield_dataset/pesticides.csv')

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [18]:
X = df.drop('Value', axis=1)
y = df['Value']
categorical_features = ['Domain','Area', 'Element','Item']
numerical_features = ['Year']

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [20]:
encoder = OneHotEncoder(handle_unknown='ignore')
X_train_encoded = encoder.fit_transform(X_train[categorical_features])
X_test_encoded = encoder.transform(X_test[categorical_features])

In [21]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[numerical_features])
X_test_scaled = scaler.transform(X_test[numerical_features])

In [22]:
import scipy.sparse as sp
import numpy as np
X_train_preprocessed = sp.hstack((X_train_scaled, X_train_encoded))
X_test_preprocessed = sp.hstack((X_test_scaled, X_test_encoded))

In [24]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor(random_state=101)
model.fit(X_train_preprocessed, y_train)

In [28]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

y_pred = model.predict(X_test_preprocessed)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the metrics
print('Decision Tree:')
print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'R^2 Score: {r2}\n')

Decision Tree:
Mean Squared Error: 58057818.622516096
Mean Absolute Error: 1513.5954712643677
R^2 Score: 0.9963516309894206

