In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [2]:
df = pd.read_csv("../../../../data/yield_prediction_model_ready_data.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [3]:
ct = ColumnTransformer([("encoder", OneHotEncoder(), [0])], remainder="passthrough")
X = ct.fit_transform(X)
X = np.array(X)

In [4]:
X_train ,X_test, y_train, y_test = train_test_split(X, y, train_size=0.25, random_state=0)

In [5]:
regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(X_train, y_train)

0,1,2
,n_estimators,10
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [6]:
test_input = np.array(ct.transform([[
  "Carrots and turnips", 19.685, 29.06245902, 24.37372951, 9.377459016,
  2424.595, 1.417377049, 9.387868852]]))
regressor.predict(test_input)

array([0.906598])

In [7]:
y_pred = np.array(regressor.predict(X_test))

np.concatenate([y_pred.reshape(-1,1), y_test.reshape(-1,1)], 1)

array([[1.436943, 1.36646 ],
       [0.721239, 1.18439 ],
       [0.479597, 0.58339 ],
       [0.777104, 0.62002 ],
       [0.02056 , 0.02023 ],
       [1.559358, 1.12487 ],
       [1.152618, 1.15963 ],
       [0.307443, 0.08372 ],
       [0.692653, 0.50328 ],
       [1.091535, 1.66494 ],
       [0.021347, 0.02338 ],
       [0.1798  , 0.11492 ],
       [0.482176, 0.9028  ],
       [1.559962, 1.94064 ],
       [0.376711, 0.09735 ],
       [0.356872, 0.1183  ],
       [1.160579, 1.31355 ],
       [0.712561, 0.59872 ],
       [0.021261, 0.01558 ],
       [0.047026, 0.09391 ],
       [0.252729, 1.09243 ],
       [1.245863, 1.73104 ],
       [1.2031  , 1.52657 ],
       [0.053902, 0.09029 ],
       [0.0524  , 0.08091 ],
       [0.055935, 0.09096 ],
       [0.070396, 0.12559 ],
       [0.908132, 1.01281 ],
       [0.55615 , 0.86179 ],
       [1.070479, 0.92472 ],
       [0.755194, 0.52499 ],
       [0.924142, 1.36738 ],
       [0.850929, 0.69938 ],
       [0.15546 , 0.21036 ],
       [1.5068

In [8]:
print("r2 " + str(r2_score(y_test, y_pred)))
print("mae " + str(mean_absolute_error(y_test, y_pred)))

r2 0.7395496954671285
mae 0.1978612
