In [1]:
# Importing Data Manipulation Libraries
import pandas as pd
import numpy as np

# Importig the Data Visualization Libraries
import seaborn as sns
import matplotlib.pyplot as plt

# Importing Machine Learning Libraries
from sklearn.preprocessing import StandardScaler,MinMaxScaler,RobustScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Importing Filter warning library
import warnings
warnings.filterwarnings('ignore')

# Importing Logging Library
import logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s', force= True,
                    filename= 'Model.log',
                    filemode='w')

In [18]:
# Loading the dataset
URL = 'https://raw.githubusercontent.com/anirudhajohare19/Crop_Recommendation_Model/refs/heads/main/Crop_Recommendation.csv'
df = pd.read_csv(URL)
df.sample(frac =1)
df

Unnamed: 0,Nitrogen,Phosphorus,Potassium,Temperature,Humidity,pH_Value,Rainfall,Crop
0,90,42,43,20.879744,82.002744,6.502985,202.935536,Rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,Rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,Rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,Rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,Rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,Coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,Coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,Coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,Coffee


In [None]:
# Target Column is Balanced
df.Crop.value_counts()

Crop
Rice           100
Maize          100
Jute           100
Cotton         100
Coconut        100
Papaya         100
Orange         100
Apple          100
Muskmelon      100
Watermelon     100
Grapes         100
Mango          100
Banana         100
Pomegranate    100
Lentil         100
Blackgram      100
MungBean       100
MothBeans      100
PigeonPeas     100
KidneyBeans    100
ChickPea       100
Coffee         100
Name: count, dtype: int64

In [None]:
# Checking for Unique Values in Target Column
df.Crop.nunique()

22

In [None]:
# Performing Univariate Analysis

from collections import OrderedDict

stats=[]
for i in df.columns:
    if df[i].dtype != 'object':
        numerical_stats = OrderedDict({
        'Feature':i,
        'Maximum':df[i].max(),
        'Minimum':df[i].min(),
        'Mean':df[i].mean(),
        '25%':df[i].quantile(0.25),
        'Median':df[i].quantile(0.50),
        '75%':df[i].quantile(0.75),
        'Kurtosis':df[i].kurt(),
        'Skewness':df[i].skew(),
        'Standard Deviation':df[i].std()


    })

    stats.append(numerical_stats)
stats = pd.DataFrame(stats)
stats

Unnamed: 0,Feature,Maximum,Minimum,Mean,25%,Median,75%,Kurtosis,Skewness,Standard Deviation
0,Nitrogen,140.0,0.0,50.551818,21.0,37.0,84.25,-1.05824,0.509721,36.917334
1,Phosphorus,145.0,5.0,53.362727,28.0,51.0,68.0,0.860279,1.010773,32.985883
2,Potassium,205.0,5.0,48.149091,20.0,32.0,49.0,4.449354,2.375167,50.647931
3,Temperature,43.675493,8.825675,25.616244,22.769375,25.598693,28.561654,1.232555,0.184933,5.063749
4,Humidity,99.981876,14.25804,71.481779,60.261953,80.473146,89.948771,0.302134,-1.091708,22.263812
5,pH_Value,9.935091,3.504752,6.46948,5.971693,6.425045,6.923643,1.655581,0.283929,0.773938
6,Rainfall,298.560117,20.211267,103.463655,64.551686,94.867624,124.267508,0.607079,0.965756,54.958389
7,Rainfall,298.560117,20.211267,103.463655,64.551686,94.867624,124.267508,0.607079,0.965756,54.958389
