In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder , StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report

In [2]:
data = pd.read_csv('../input/fertilizer-prediction/Fertilizer Prediction.csv')

In [3]:
data

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea
...,...,...,...,...,...,...,...,...,...
94,25,50,32,Clayey,Pulses,24,0,19,28-28
95,30,60,27,Red,Tobacco,4,17,17,10-26-26
96,38,72,51,Loamy,Wheat,39,0,0,Urea
97,36,60,43,Sandy,Millets,15,0,41,DAP


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Temparature      99 non-null     int64 
 1   Humidity         99 non-null     int64 
 2   Moisture         99 non-null     int64 
 3   Soil Type        99 non-null     object
 4   Crop Type        99 non-null     object
 5   Nitrogen         99 non-null     int64 
 6   Potassium        99 non-null     int64 
 7   Phosphorous      99 non-null     int64 
 8   Fertilizer Name  99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [5]:
# preprocessing 

In [6]:
y = data['Fertilizer Name']
X = data.drop('Fertilizer Name' , axis = 1)

X_train , X_test , y_train , y_test = train_test_split(X , y ,train_size = 0.7 , shuffle = True , random_state = 1)

In [9]:
nominal_transform = Pipeline( steps = [('onehot' , OneHotEncoder(sparse = False))])

preprocessor = ColumnTransformer(transformers=[('nominal', nominal_transform, ['Soil Type', 'Crop Type'])], remainder='passthrough')

model = Pipeline(steps = [ ('preprocessor', preprocessor) , ('scaler', StandardScaler()),('classifier', RandomForestClassifier())])

In [10]:
model.fit(X_train, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('nominal',
                                                  Pipeline(steps=[('onehot',
                                                                   OneHotEncoder(sparse=False))]),
                                                  ['Soil Type',
                                                   'Crop Type'])])),
                ('scaler', StandardScaler()),
                ('classifier', RandomForestClassifier())])

In [11]:
print("Test Accuracy: {:.2f}%".format(model.score(X_test, y_test) * 100))

Test Accuracy: 86.67%


In [12]:
y_pred = model.predict(X_test)

clr = classification_report(y_test, y_pred)
print("Classification Report:\n----------------------\n", clr)

Classification Report:
----------------------
               precision    recall  f1-score   support

    10-26-26       0.50      0.33      0.40         3
    14-35-14       0.71      1.00      0.83         5
    17-17-17       0.50      0.33      0.40         3
       20-20       1.00      1.00      1.00         4
       28-28       1.00      1.00      1.00         3
         DAP       1.00      1.00      1.00         5
        Urea       1.00      1.00      1.00         7

    accuracy                           0.87        30
   macro avg       0.82      0.81      0.80        30
weighted avg       0.85      0.87      0.85        30

