In [1]:
import numpy as np 
import pandas as pd

In [2]:
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [3]:
df_yield = pd.read_csv('apy-1.csv')


df_yield.head()

Unnamed: 0,State_Name,District_Name,Crop_Year,Season,Crop,Area,Production
0,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Arecanut,1254.0,2000.0
1,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Other Kharif pulses,2.0,1.0
2,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Rice,102.0,321.0
3,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Banana,176.0,641.0
4,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Cashewnut,720.0,165.0


In [4]:
# drop unwanted columns.
df_yield = df_yield.drop(['State_Name', 'Crop_Year'], axis=1)
df_yield.head()

Unnamed: 0,District_Name,Season,Crop,Area,Production
0,NICOBARS,Kharif,Arecanut,1254.0,2000.0
1,NICOBARS,Kharif,Other Kharif pulses,2.0,1.0
2,NICOBARS,Kharif,Rice,102.0,321.0
3,NICOBARS,Whole Year,Banana,176.0,641.0
4,NICOBARS,Whole Year,Cashewnut,720.0,165.0


In [5]:
df_yield['prod/Ar'] = df_yield['Production']/df_yield['Area']
df_yield = df_yield.drop(['Production','Area'], axis=1)
df_yield.head()

Unnamed: 0,District_Name,Season,Crop,prod/Ar
0,NICOBARS,Kharif,Arecanut,1.594896
1,NICOBARS,Kharif,Other Kharif pulses,0.5
2,NICOBARS,Kharif,Rice,3.147059
3,NICOBARS,Whole Year,Banana,3.642045
4,NICOBARS,Whole Year,Cashewnut,0.229167


In [6]:
y1 = df_yield['Crop']
X1= df_yield[['District_Name','Season','prod/Ar']]

In [7]:
from sklearn import preprocessing
# label_encoder object knows how to understand word labels.
label_encoder = preprocessing.LabelEncoder()
  
# Encode labels in column 'species'.
df_yield['District_Name']= label_encoder.fit_transform(df_yield['District_Name'])
df_yield['District_Name'].unique()

array([427, 429, 559,  24, 122, 172, 217, 274, 335, 340, 463, 566, 567,
       625, 626, 633,  26, 113, 157, 174, 176, 342, 353, 354, 356, 357,
       415, 446, 580, 593, 613, 614, 635, 637,  41,  64,  95, 103, 119,
       137, 152, 155, 158, 159, 206, 208, 221, 270, 278, 279, 292, 294,
       322, 345, 381, 406, 412, 551, 558, 592, 603,  29,  31,  34,  53,
        70,  77,  85, 102, 135, 202, 212, 257, 261, 275, 301, 306, 317,
       346, 361, 362, 399, 402, 411, 423, 448, 453, 468, 470, 498, 505,
       509, 518, 531, 532, 548, 552, 572, 618, 111,  46,  47,  48,  66,
        73,  88,  91, 134, 147, 169, 200, 258, 259, 272, 283, 327, 330,
       331, 365, 398, 418, 475, 476, 481, 569, 573, 576, 130, 432, 562,
         5,  20,  23,  49,  80,  81, 133, 166, 196, 255, 271, 273, 310,
       369, 419, 422, 442, 449, 462, 480, 502, 574, 575, 578, 617, 619,
        16,  84, 183, 186, 219, 233, 263, 268, 276, 296, 341, 368, 389,
       441, 443, 444, 495, 497, 547, 557, 642, 105, 222, 282, 31

In [8]:
df_yield['Season']= label_encoder.fit_transform(df_yield['Season'])
df_yield['Season'].unique()


array([1, 4, 0, 2, 3, 5])

In [9]:
df_yield['Crop']= label_encoder.fit_transform(df_yield['Crop'])  
df_yield['Crop'].unique()

array([  2,  74,  95,   7,  22,  28,  38, 106, 108, 109,  15,  37, 123,
       114,  59,  63, 116,   3,  43, 107,   6,  25,  33,  45,  48,  53,
        90, 111,  41, 119,  61, 102,  57,  98,  67, 122,  99, 103,  32,
        87,  69, 105,  10,  13,  18,  27,  35,  42,  60,  68, 121,  73,
        75,  77,  85, 112,  92,  62,  34,  55,  86, 101,  19,  80,  65,
        17, 100, 117,  39,  40,  66,  88,  49,  81,  16,  76,  83,   8,
        52,  44,  64,  70,  31, 115,  21,  93,   1,   5,  23,  24,  20,
        97,  14,  36,  46, 104,  89, 110,  29,  26,  71, 118, 113,  51,
        30,  56,   9,  47,  82,  91,  96,   4,  11,  54,  94, 120,   0,
        78,  79,  84,  58,  12,  72,  50])

In [10]:
df_yield = df_yield.fillna(0)
df_yield['prod/Ar']=df_yield['prod/Ar'].astype(int)
df_yield.head()

Unnamed: 0,District_Name,Season,Crop,prod/Ar
0,427,1,2,1
1,427,1,74,0
2,427,1,95,3
3,427,4,7,3
4,427,4,22,0


In [11]:
#from sklearn.preprocessing import MinMaxScaler
#scaler=MinMaxScaler()
#df_yield=scaler.fit_transform(df_yield)

In [12]:
from sklearn.model_selection import train_test_split
X1_train, X1_test, y1_train, y1_test = train_test_split(df_yield, y1, test_size=0.2)

In [13]:
modelss=DecisionTreeClassifier(random_state=24)
modelss.fit(X1_train, y1_train) # fit the model
new_input = [[427,1,4,0]]
# get prediction for new input
new_output = modelss.predict(new_input)
# summarize input and output
print(new_input, new_output)

[[427, 1, 4, 0]] ['Ash Gourd']


In [None]:
tree= DecisionTreeClassifier() # using the random state for reproducibility
forest= RandomForestClassifier()
knn= KNeighborsClassifier()
svm= SVC()
LR=LogisticRegression()
LDA= LinearDiscriminantAnalysis()
NB=GaussianNB()



# now, create a list with the objects 
models= [tree, forest, knn, svm , LR , LDA , NB]

for model in models:
    model.fit(X1_train, y1_train) # fit the model
    y1_pred= model.predict(X1_test) # then predict on the test set
    accuracy1= accuracy_score(y1_test, y1_pred) # this gives us how often the algorithm predicted correctly
    clf_report1= classification_report(y1_test, y1_pred) # with the report, we have a bigger picture, with precision and recall for each class
    
    print(f"The accuracy of model {type(model).__name__} is {accuracy1:.2f}")
    print(clf_report1)
    
    print("\n")

The accuracy of model DecisionTreeClassifier is 1.00
                           precision    recall  f1-score   support

      Arcanut (Processed)       1.00      1.00      1.00         5
                 Arecanut       1.00      1.00      1.00       300
                Arhar/Tur       1.00      1.00      1.00      1453
                Ash Gourd       1.00      1.00      1.00         8
            Atcanut (Raw)       1.00      1.00      1.00         4
                    Bajra       1.00      1.00      1.00      1072
                   Banana       1.00      1.00      1.00       670
                   Barley       1.00      1.00      1.00       824
                     Bean       1.00      1.00      1.00         3
Beans & Mutter(Vegetable)       1.00      1.00      1.00        39
                Beet Root       1.00      1.00      1.00         3
                      Ber       1.00      1.00      1.00         2
                   Bhindi       1.00      1.00      1.00        49
        

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


The accuracy of model RandomForestClassifier is 1.00
                           precision    recall  f1-score   support

      Arcanut (Processed)       1.00      1.00      1.00         5
                 Arecanut       1.00      1.00      1.00       300
                Arhar/Tur       1.00      1.00      1.00      1453
                Ash Gourd       0.89      1.00      0.94         8
            Atcanut (Raw)       1.00      1.00      1.00         4
                    Bajra       1.00      1.00      1.00      1072
                   Banana       0.99      1.00      1.00       670
                   Barley       1.00      1.00      1.00       824
                     Bean       1.00      1.00      1.00         3
Beans & Mutter(Vegetable)       0.94      0.87      0.91        39
                Beet Root       1.00      0.67      0.80         3
                      Ber       0.40      1.00      0.57         2
                   Bhindi       0.98      0.92      0.95        49
        

  'precision', 'predicted', average, warn_for)


The accuracy of model KNeighborsClassifier is 0.96
                           precision    recall  f1-score   support

      Arcanut (Processed)       0.00      0.00      0.00         5
                 Arecanut       0.93      0.95      0.94       300
                Arhar/Tur       0.98      0.99      0.99      1453
                Ash Gourd       0.00      0.00      0.00         8
            Atcanut (Raw)       0.00      0.00      0.00         4
                    Bajra       0.99      0.99      0.99      1072
                   Banana       0.96      0.98      0.97       670
                   Barley       0.96      1.00      0.98       824
                     Bean       1.00      0.33      0.50         3
Beans & Mutter(Vegetable)       0.38      0.13      0.19        39
                Beet Root       0.33      0.33      0.33         3
                      Ber       0.00      0.00      0.00         2
                   Bhindi       0.67      0.37      0.47        49
          



In [None]:
modelss=DecisionTreeClassifier(random_state=24)
modelss.fit(X1_train, y1_train) # fit the model
y_pred= model.predict(X_test)
df_yield['y_test']= label_encoder.fit_transform(df_yield['y_test'])
df_yield['y_test'].unique()
from sklearn import metrics
error=metrics.mean_absolute_error(y_test, y_pred)
print("Error=")
    print( error)