In [1]:
import pandas as pd  
import xgboost as xgb  
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 
import numpy as np  
from xgboost import XGBRegressor , plot_importance
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_log_error, mean_absolute_percentage_error


df = pd.read_csv('yield_df.csv')
df.head()

df.dropna()

df=df.drop(["Area", "Item"],axis=1)

x = df.drop('hg/ha_yield', axis=1)
y = df['hg/ha_yield']

X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=0, test_size=0.3)

X_train.head(),X_test.head()

regressor=xgb.XGBRegressor(eval_metric='rmsle')

param_grid = [{
    "subsample": [0.25,0.50,0.75,1],
    "colsample_bytree": [0.50,0.75,1],
    "max_depth": [4,5,6],
    "n_estimators": [100,200,300],
    "min_child_weight": [1,2,3,4],
    "reg_alpha": [0.1,0.01,1],
    "learning_rate": [0.01, 0.015]
}]

search = GridSearchCV(regressor, param_grid, cv=5)
search.fit(X_train, y_train)

print(search.best_params_)

regressor=xgb.XGBRegressor(
    subsample = search.best_params_["subsample"],
    colsample_bytree = search.best_params_["colsample_bytree"],
    learning_rate= search.best_params_["learning_rate"],
    max_depth= search.best_params_["max_depth"],
    min_child_weight = search.best_params_["min_child_weight"],
    n_estimators= search.best_params_["n_estimators"],
    reg_alpha = search.best_params_["reg_alpha"],
    eval_metric= 'rmsle'
)

regressor.fit(X_train, y_train)

prediction = regressor.predict(X_test)

RMSLE = np.sqrt(mean_squared_log_error(y_test,prediction))

print('RMSLE:', RMSLE)

MAPE = mean_absolute_percentage_error(y_test, prediction)

print('MAPE: {:.2f}%'.format(MAPE*100))

#regressor.score(X_test, y_test)

xgb.plot_importance(regressor)

#plot_importance(regressor,max_num_features=5, ax=ax)
plt.show()



KeyboardInterrupt: 