In [1]:
import pandas as pd
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import GridSearchCV

In [5]:
# importing csv file as dataframe
df = pd.read_csv("CropDataset.csv")
# column names
df.keys()

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')

In [3]:
# features and label set
X = df.iloc[:, 3:7]
Y = df.iloc[:,7]

In [12]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.30, random_state=101)

In [13]:
# comparing different parameter values
param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)
grid.fit(X_train,y_train)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV] gamma=1, C=0.1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV] .......... gamma=1, C=0.1, kernel=rbf, score=0.049, total=   0.2s
[CV] gamma=1, C=0.1, kernel=rbf ......................................
[CV] .......... gamma=1, C=0.1, kernel=rbf, score=0.049, total=   0.1s
[CV] gamma=1, C=0.1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.3s remaining:    0.0s


[CV] .......... gamma=1, C=0.1, kernel=rbf, score=0.049, total=   0.1s
[CV] gamma=1, C=0.1, kernel=rbf ......................................
[CV] .......... gamma=1, C=0.1, kernel=rbf, score=0.052, total=   0.1s
[CV] gamma=1, C=0.1, kernel=rbf ......................................
[CV] .......... gamma=1, C=0.1, kernel=rbf, score=0.058, total=   0.1s
[CV] gamma=0.1, C=0.1, kernel=rbf ....................................
[CV] ........ gamma=0.1, C=0.1, kernel=rbf, score=0.123, total=   0.1s
[CV] gamma=0.1, C=0.1, kernel=rbf ....................................
[CV] ........ gamma=0.1, C=0.1, kernel=rbf, score=0.136, total=   0.1s
[CV] gamma=0.1, C=0.1, kernel=rbf ....................................
[CV] ........ gamma=0.1, C=0.1, kernel=rbf, score=0.120, total=   0.1s
[CV] gamma=0.1, C=0.1, kernel=rbf ....................................
[CV] ........ gamma=0.1, C=0.1, kernel=rbf, score=0.162, total=   0.1s
[CV] gamma=0.1, C=0.1, kernel=rbf ....................................
[CV] .

[CV] ........ gamma=0.01, C=10, kernel=rbf, score=0.903, total=   0.1s
[CV] gamma=0.01, C=10, kernel=rbf ....................................
[CV] ........ gamma=0.01, C=10, kernel=rbf, score=0.890, total=   0.1s
[CV] gamma=0.01, C=10, kernel=rbf ....................................
[CV] ........ gamma=0.01, C=10, kernel=rbf, score=0.906, total=   0.1s
[CV] gamma=0.001, C=10, kernel=rbf ...................................
[CV] ....... gamma=0.001, C=10, kernel=rbf, score=0.886, total=   0.0s
[CV] gamma=0.001, C=10, kernel=rbf ...................................
[CV] ....... gamma=0.001, C=10, kernel=rbf, score=0.883, total=   0.0s
[CV] gamma=0.001, C=10, kernel=rbf ...................................
[CV] ....... gamma=0.001, C=10, kernel=rbf, score=0.854, total=   0.0s
[CV] gamma=0.001, C=10, kernel=rbf ...................................
[CV] ....... gamma=0.001, C=10, kernel=rbf, score=0.867, total=   0.0s
[CV] gamma=0.001, C=10, kernel=rbf ...................................
[CV] .

[Parallel(n_jobs=1)]: Done 125 out of 125 | elapsed:   12.2s finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [14]:
# best parameters
param = grid.best_params_

In [15]:
# training model
svc = SVC(C = param['C'], gamma = param['gamma'], kernel = param['kernel'])
svc.fit(X_train,y_train)
predictions = svc.predict(X_test)

In [16]:
# prediction made on test set
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

       apple       0.82      0.86      0.84        37
      banana       0.97      0.94      0.95        32
   blackgram       0.95      0.97      0.96        36
    chickpea       1.00      1.00      1.00        32
     coconut       0.72      0.91      0.81        23
      coffee       0.79      0.97      0.87        31
      cotton       0.82      0.97      0.89        29
      grapes       1.00      0.90      0.95        41
        jute       0.90      1.00      0.95        27
 kidneybeans       1.00      1.00      1.00        30
      lentil       0.97      0.97      0.97        29
       maize       0.92      0.97      0.94        34
       mango       0.97      1.00      0.99        33
   mothbeans       0.96      0.88      0.92        25
    mungbean       0.90      1.00      0.95        28
   muskmelon       1.00      1.00      1.00        26
      orange       0.89      0.86      0.88        29
      papaya       0.87    