In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

In [2]:
df = pd.read_csv('data/train.csv')

In [3]:
df.head()

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,0,37,70,36,Clayey,Sugarcane,36,4,5,28-28
1,1,27,69,65,Sandy,Millets,30,6,18,28-28
2,2,29,63,32,Sandy,Millets,24,12,16,17-17-17
3,3,35,62,54,Sandy,Barley,39,12,4,10-26-26
4,4,35,58,43,Red,Paddy,37,2,16,DAP


In [14]:
y

0            28-28
1            28-28
2         17-17-17
3         10-26-26
4              DAP
            ...   
749995       28-28
749996    17-17-17
749997    10-26-26
749998       20-20
749999        Urea
Name: Fertilizer Name, Length: 750000, dtype: object

In [54]:
X = df.drop('Fertilizer Name', axis = 1)
y = df['Fertilizer Name']

In [55]:
X

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous
0,0,37,70,36,1,8,36,4,5
1,1,27,69,65,4,4,30,6,18
2,2,29,63,32,4,4,24,12,16
3,3,35,62,54,4,0,39,12,4
4,4,35,58,43,3,6,37,2,16
...,...,...,...,...,...,...,...,...,...
749995,749995,25,69,30,1,3,8,16,6
749996,749996,37,64,58,2,8,38,8,20
749997,749997,35,68,59,4,2,6,11,29
749998,749998,31,68,29,3,1,9,11,12


In [56]:
#Creating Column Transformers
num_features = X.select_dtypes(exclude="object").columns
cat_features = X.select_dtypes(include = "object").columns

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder


encoder = LabelEncoder()

for col in cat_features:
    X[col] = encoder.fit_transform(X[col])

y  = encoder.fit_transform(y)

In [57]:
y

array([4, 4, 2, ..., 0, 3, 6])

In [None]:
X.shape

In [64]:
#Splitting the dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((600000, 9), (150000, 9), (600000,), (150000,))

In [12]:
# Creating Evaluation Function
def evaluate_model(true, predicted):
    accuracy = accuracy_score(true, predicted)
    precision = precision_score(true, predicted, average = 'macro' )
    recall = recall_score(true, predicted, average = 'macro')
    f1 = f1_score(true, predicted,average = 'macro')
    return accuracy, precision, recall, f1

In [13]:
models  = {
    'K-Neigbors Classifier': KNeighborsClassifier(),
    'DecisionTree Classifier': DecisionTreeClassifier(),
    'RandomForest Classifier': RandomForestClassifier(),
    'AdaBoost Classifier': AdaBoostClassifier(),
    'CatBoost Classifier': CatBoostClassifier(),
    'Xgboost Classifier': XGBClassifier()
}

model_list = []
accuracy_list = []


for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)

    #Make Predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)


    #Evaluating the Models
    model_train_accuracy, model_train_precision, model_train_recall, model_train_f1 = evaluate_model(y_train, y_train_pred)
    model_test_accuracy, model_test_precision, model_test_recall, model_test_f1 = evaluate_model(y_test, y_test_pred)


    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model performance for Training set')
    print("- Accuracy: {:.4f}".format(model_train_accuracy))
    print("- Precison: {:.4f}".format(model_train_precision))
    print("- Recall: {:.4f}".format(model_train_recall))
    print("- F1 Score: {:.4f}".format(model_train_f1))

    print('----------------------------------')

    print('Model performance for Testing set')
    print("- Accuracy: {:.4f}".format(model_test_accuracy))
    print("- Precison: {:.4f}".format(model_test_precision))
    print("- Recall: {:.4f}".format(model_test_recall))
    print("- F1 Score: {:.4f}".format(model_test_f1))
    accuracy_list.append(model_test_accuracy)
    
    print('='*35)
    print('\n')


K-Neigbors Classifier
Model performance for Training set
- Accuracy: 0.4111
- Precison: 0.4260
- Recall: 0.4041
- F1 Score: 0.4024
----------------------------------
Model performance for Testing set
- Accuracy: 0.1501
- Precison: 0.1466
- Recall: 0.1464
- F1 Score: 0.1418


DecisionTree Classifier
Model performance for Training set
- Accuracy: 1.0000
- Precison: 1.0000
- Recall: 1.0000
- F1 Score: 1.0000
----------------------------------
Model performance for Testing set
- Accuracy: 0.1490
- Precison: 0.1481
- Recall: 0.1481
- F1 Score: 0.1481


RandomForest Classifier
Model performance for Training set
- Accuracy: 1.0000
- Precison: 1.0000
- Recall: 1.0000
- F1 Score: 1.0000
----------------------------------
Model performance for Testing set
- Accuracy: 0.1643
- Precison: 0.1617
- Recall: 0.1608
- F1 Score: 0.1591




  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


AdaBoost Classifier
Model performance for Training set
- Accuracy: 0.1534
- Precison: 0.0452
- Recall: 0.1441
- F1 Score: 0.0624
----------------------------------
Model performance for Testing set
- Accuracy: 0.1524
- Precison: 0.0445
- Recall: 0.1432
- F1 Score: 0.0618


Learning rate set to 0.10965
0:	learn: 1.9449847	total: 757ms	remaining: 12m 36s
1:	learn: 1.9441026	total: 1.54s	remaining: 12m 47s
2:	learn: 1.9433648	total: 2.08s	remaining: 11m 29s
3:	learn: 1.9427529	total: 2.58s	remaining: 10m 43s
4:	learn: 1.9421478	total: 3.05s	remaining: 10m 6s
5:	learn: 1.9416553	total: 3.67s	remaining: 10m 8s
6:	learn: 1.9412242	total: 4.16s	remaining: 9m 50s
7:	learn: 1.9408467	total: 4.82s	remaining: 9m 57s
8:	learn: 1.9404884	total: 5.24s	remaining: 9m 36s
9:	learn: 1.9401814	total: 5.74s	remaining: 9m 28s
10:	learn: 1.9399221	total: 6.33s	remaining: 9m 28s
11:	learn: 1.9396211	total: 6.75s	remaining: 9m 15s
12:	learn: 1.9393587	total: 7.22s	remaining: 9m 8s
13:	learn: 1.9391518	total: 

ValueError: Invalid classes inferred from unique values of `y`.  Expected: [0 1 2 3 4 5 6], got ['10-26-26' '14-35-14' '17-17-17' '20-20' '28-28' 'DAP' 'Urea']

In [11]:
model_train_precision

array([0.34823622, 0.39967962, 0.43640252, 0.44818547, 0.45239804,
       0.44888564, 0.44825807])

In [59]:
model  =RandomForestClassifier()

In [60]:
model.fit(X_train, y_train)


In [None]:
model.score(X_test, y_test)

In [61]:
y_pred = model.predict(X_test)

In [62]:
evaluate_model(y_test, y_pred)

(0.16547333333333333,
 0.16362144629563646,
 0.16190095504923027,
 0.1601137108932388)

In [29]:
X.head()

AttributeError: 'csr_matrix' object has no attribute 'head'

In [63]:
X

Unnamed: 0,id,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous
0,0,37,70,36,1,8,36,4,5
1,1,27,69,65,4,4,30,6,18
2,2,29,63,32,4,4,24,12,16
3,3,35,62,54,4,0,39,12,4
4,4,35,58,43,3,6,37,2,16
...,...,...,...,...,...,...,...,...,...
749995,749995,25,69,30,1,3,8,16,6
749996,749996,37,64,58,2,8,38,8,20
749997,749997,35,68,59,4,2,6,11,29
749998,749998,31,68,29,3,1,9,11,12
