In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle
import xgboost as xgb
import joblib
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score

## Load Data

In [2]:
dataset = pd.read_csv("Crop_recommendation.csv")

## Split Training and Testing Data

In [3]:
dataset.drop_duplicates(inplace=True)

X, y = dataset.iloc[:, :-1], dataset.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Run Pickle Files of 6 Models

##### 1. KNN

In [4]:
KNN_Pkl_Filename = "Pickle_KNN_Model.pkl" 

In [5]:
with open(KNN_Pkl_Filename, 'rb') as KNN_file:  
    Pickled_KNN_Model = pickle.load(KNN_file)
    
Pickled_KNN_Model

KNeighborsClassifier(metric='manhattan', n_neighbors=7, weights='distance')

In [6]:
# Calculate the Score 
knn_score = Pickled_KNN_Model.score(X_test, y_test)  
# Print the Score
print("Test score: {0:.2f} %".format(100 * knn_score))  

# Predict the Labels using the reloaded Model
Ypredict = Pickled_KNN_Model.predict(X_test)  

Test score: 99.55 %


##### 2. Logistics Regression

In [7]:
LG_Pkl_Filename = "LR.pkl"
with open(LG_Pkl_Filename, 'rb') as LG_file:  
    Pickled_LG_Model = pickle.load(LG_file)

In [8]:
# Calculate the Score 
LG_score = Pickled_LG_Model.score(X_test, y_test)  
# Print the Score
print("Test score: {0:.2f} %".format(100 * LG_score))  

Test score: 96.82 %


##### 3. Decision Tree

In [9]:
DT_Pkl_Filename = "Pickle_DecisionTree_Model.pkl"
with open(DT_Pkl_Filename, 'rb') as DT_file:  
    Pickled_DT_Model = pickle.load(DT_file)
Pickled_DT_Model

DecisionTreeClassifier()

In [10]:
# Calculate the Score 
DT_score = Pickled_DT_Model.score(X_test, y_test)  
# Print the Score
print("Test score: {0:.2f} %".format(100 * DT_score))  

Test score: 99.09 %


##### 4. Random Forest

In [11]:
random_forest_Pkl_Filename = "Pickle_random_forest_Model.pkl"
with open(random_forest_Pkl_Filename, 'rb') as RF_file:  
    Pickled_RF_Model = pickle.load(RF_file)
Pickled_RF_Model

RandomForestClassifier(max_depth=4, random_state=0)

In [12]:
# Calculate the Score 
RF_score = Pickled_RF_Model.score(X_test, y_test)  
# Print the Score
print("Test score: {0:.2f} %".format(100 * RF_score))

Test score: 95.00 %


##### 5. XGboost Model

In [13]:
XGboost_Pkl_Filename = "XGboost_model.pkl"
with open(XGboost_Pkl_Filename, 'rb') as file:  
    Pickled_XGboost_Model = pickle.load(file)
Pickled_XGboost_Model

<xgboost.core.Booster at 0x7fa8c8157370>

In [14]:
label_encoder = LabelEncoder()
xgb_y_test = label_encoder.fit_transform(y_test)

xgb_dtest = xgb.DMatrix(X_test)
# Calculate the Score 

# Predict the Labels using the reloaded Model
Ypredict = Pickled_XGboost_Model.predict(xgb_dtest)  

# Calculate the Accuracy Score
xgb_score = accuracy_score(xgb_y_test, Ypredict)  

# Print the Score
print("Test score: {0:.2f} %".format(100 * xgb_score))


Test score: 100.00 %


##### 6.SVM

In [15]:
# Standardize the data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Load the model from the joblib file
loaded_svm_classifier = joblib.load('joblib_SVM_Model.pkl')

# Calculate the test score
SVM_score = loaded_svm_classifier.score(X_test, y_test)

# Print the test score
print("Test score: {0:.2f} %".format(100 * SVM_score))

Test score: 99.09 %


## Performance Comparison

In [16]:
performance = pd.DataFrame({'model': ['KNN', 'Logistic Regression', 'Decision Tree', 'Random Forest', 'XGBoost'],
                   'accuracy': [knn_score, LG_score, DT_score, RF_score, xgb_score]})

# sort the dataframe by the accuracy in ascending order
performance = performance.sort_values(by='accuracy', ascending=False)

performance

Unnamed: 0,model,accuracy
4,XGBoost,1.0
0,KNN,0.995455
2,Decision Tree,0.990909
1,Logistic Regression,0.968182
3,Random Forest,0.95
