# Fertilizer Recommendation

## Introduction

## Importing Necessary Libraries

In [4]:
# NumPy for numerical operations and array manipulation
import numpy as np

# Pandas for data manipulation and analysis using data frames
import pandas as pd

# Matplotlib for creating visualizations in Python
import matplotlib.pyplot as plt 

# Seaborn for statistical data visualization based on Matplotlib
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score
import warnings

# Ignore sklearn warnings
warnings.filterwarnings("ignore", category=UserWarning)


# Various classification models from scikit-learn for machine learning tasks
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Performance metrics such as accuracy_score and classification_report from scikit-learn
from sklearn.metrics import accuracy_score, classification_report

# The pickle module for saving and loading machine learning models
import pickle


## Loading and Exploring Data

#### Loading Data

In [5]:
fertilizer_recommendation_data = pd.read_csv("../data/raw/fertilizer_recommendation.csv")

#### Exploring Data

In [6]:
fertilizer_recommendation_data.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [7]:
fertilizer_recommendation_data.tail()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
94,25,50,32,Clayey,Pulses,24,0,19,28-28
95,30,60,27,Red,Tobacco,4,17,17,10-26-26
96,38,72,51,Loamy,Wheat,39,0,0,Urea
97,36,60,43,Sandy,Millets,15,0,41,DAP
98,29,58,57,Black,Sugarcane,12,0,10,20-20


In [8]:
fertilizer_recommendation_data.describe()

Unnamed: 0,Temparature,Humidity,Moisture,Nitrogen,Potassium,Phosphorous
count,99.0,99.0,99.0,99.0,99.0,99.0
mean,30.282828,59.151515,43.181818,18.909091,3.383838,18.606061
std,3.502304,5.840331,11.271568,11.599693,5.814667,13.476978
min,25.0,50.0,25.0,4.0,0.0,0.0
25%,28.0,54.0,34.0,10.0,0.0,9.0
50%,30.0,60.0,41.0,13.0,0.0,19.0
75%,33.0,64.0,50.5,24.0,7.5,30.0
max,38.0,72.0,65.0,42.0,19.0,42.0


In [9]:
fertilizer_recommendation_data.size

891

In [10]:
fertilizer_recommendation_data.shape

(99, 9)

In [11]:
fertilizer_recommendation_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Temparature      99 non-null     int64 
 1   Humidity         99 non-null     int64 
 2   Moisture         99 non-null     int64 
 3   Soil Type        99 non-null     object
 4   Crop Type        99 non-null     object
 5   Nitrogen         99 non-null     int64 
 6   Potassium        99 non-null     int64 
 7   Phosphorous      99 non-null     int64 
 8   Fertilizer Name  99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [12]:
fertilizer_recommendation_data.columns

Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

In [13]:
#changing the column names
fertilizer_recommendation_data.rename(columns={'Humidity ':'Humidity','Soil Type':'Soil_Type','Crop Type':'Crop_Type','Fertilizer Name':'Fertilizer'},inplace=True)

In [14]:
fertilizer_recommendation_data.nunique()

Temparature    14
Humidity       13
Moisture       41
Soil_Type       5
Crop_Type      11
Nitrogen       24
Potassium      13
Phosphorous    32
Fertilizer      7
dtype: int64

In [15]:
#checking for null values
fertilizer_recommendation_data.isna().sum()

Temparature    0
Humidity       0
Moisture       0
Soil_Type      0
Crop_Type      0
Nitrogen       0
Potassium      0
Phosphorous    0
Fertilizer     0
dtype: int64

## Data Visualization

Let's visualize the data

## Data Preprocessing

In [18]:
missing_values = fertilizer_recommendation_data.isnull().sum()
missing_values

Temparature    0
Humidity       0
Moisture       0
Soil_Type      0
Crop_Type      0
Nitrogen       0
Potassium      0
Phosphorous    0
Fertilizer     0
dtype: int64

In our dataset, there are no missing values, so there is no need to handle them.