**Summary**: A random forest classifier was trained on a [kaggle](https://www.kaggle.com/atharvaingle/crop-recommendation-dataset) dataset and outputs the optimal crop based on NPK ratios, temperature, humidity, pH levels, and rainfall. Accuracy: 99.72%.

Used in "Growify ML Models" app

# Imports

In [2]:
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from joblib import dump

# Read/Organize Data

In [3]:
df = pd.read_csv('D:/ML_Data/crop_recommendation/Crop_recommendation.csv')

In [4]:
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [5]:
labels = df['label']

In [6]:
df = df.drop(columns='label')

In [7]:
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
0,90,42,43,20.879744,82.002744,6.502985,202.935536
1,85,58,41,21.770462,80.319644,7.038096,226.655537
2,60,55,44,23.004459,82.320763,7.840207,263.964248
3,74,35,40,26.491096,80.158363,6.980401,242.864034
4,78,42,42,20.130175,81.604873,7.628473,262.717340
...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507
2196,99,15,27,27.417112,56.636362,6.086922,127.924610
2197,118,33,30,24.131797,67.225123,6.362608,173.322839
2198,117,32,34,26.272418,52.127394,6.758793,127.175293


In [8]:
labels

0         rice
1         rice
2         rice
3         rice
4         rice
         ...  
2195    coffee
2196    coffee
2197    coffee
2198    coffee
2199    coffee
Name: label, Length: 2200, dtype: object

In [9]:
X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size = 0.2, random_state = 0)

In [10]:
X_train.shape

(1760, 7)

In [11]:
y_train.shape

(1760,)

In [12]:
X_test.shape

(440, 7)

In [13]:
y_test.shape

(440,)

In [14]:
X_train

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
1567,27,120,200,21.452787,90.745319,6.110219,116.703658
2031,81,36,38,23.765547,87.983299,6.334838,150.316615
2073,90,59,35,24.251335,89.864541,7.098228,175.174211
1374,81,16,45,26.904357,86.254262,6.727468,59.759800
279,47,80,77,17.182484,16.428918,7.561108,72.850173
...,...,...,...,...,...,...,...
1033,102,71,48,28.654563,79.286937,5.695268,102.463378
1731,34,68,51,27.347349,94.177567,6.687088,40.351531
763,35,64,15,28.474423,63.536045,6.500145,69.527441
835,39,65,23,25.434598,69.126134,7.685959,41.026829


In [15]:
y_train

1567         apple
2031          jute
2073          jute
1374    watermelon
279       chickpea
           ...    
1033        banana
1731        papaya
763      blackgram
835         lentil
1653        orange
Name: label, Length: 1760, dtype: object

In [16]:
classes = labels.unique()
classes

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

# Build Random Forest Classifier Model

In [17]:
rf = make_pipeline(StandardScaler(), RandomForestClassifier(random_state=18))

In [18]:
rf

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('randomforestclassifier',
                 RandomForestClassifier(random_state=18))])

In [19]:
rf.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('randomforestclassifier',
                 RandomForestClassifier(random_state=18))])

# Get Metrics and Save Model

In [20]:
print('Accuracy (test data): ' + str(rf.score(X_test, y_test)*100) + '%')

Accuracy (test data): 99.77272727272727%


In [21]:
def predict(N, P, K, temp, hum, pH, rain):
    result = rf.predict([[N, P, K, temp, hum, pH, rain]])[0]
    print('Recommeded Crop: ' + result)

In [22]:
predict(N=90, P=42, K=43, temp=21, hum=82, pH=7, rain=203)

Recommeded Crop: rice


In [23]:
predict(N=35, P=43, K=79, temp=25, hum=45, pH=3, rain=190)

Recommeded Crop: chickpea


In [40]:
dump(rf, 'rf_crop_recommendation.joblib')

['rf_crop_recommendation.joblib']