In [None]:
#Crop and Fertilizer Recommendation System 
#Context
    #Agriculture plays a crucial role in food security and economic stability. Selecting the right crop based on soil and climate conditions can significantly enhance yield and sustainability. This dataset is designed to assist in recommending suitable crops and fertilizers based on key soil and environmental parameters.

#Problem Statement
    #Develop a machine learning model to accurately predict the most suitable crop and recommend appropriate fertilizers based on soil composition and climatic conditions.

#Dataset Description
    #The dataset contains various agricultural parameters as predictor variables and a target variable, "label", which represents the recommended crop.

#Predictor Variables

#N (Nitrogen):
    #Nitrogen content in the soil (measured in mg/kg).

#P (Phosphorus):
    #Phosphorus content in the soil (measured in mg/kg).

#K (Potassium):
    #Potassium content in the soil (measured in mg/kg).

#Temperature:
    #Atmospheric temperature (in degrees Celsius).

#Humidity:
    #Relative humidity in percentage (%).

#pH:
#Soil pH level (indicating soil acidity or alkalinity).

#Rainfall:
#Annual rainfall (in mm).


#Target Variable
#Label (Crop Type):
#The recommended crop for cultivation based on soil and climate conditions.
#Includes multiple crop categories such as rice, wheat, maize, mango, apple, coffee, cotton, and more.
#Potential Additional Feature


#Fertilizer Recommendation:
#Based on soil deficiencies, a model could suggest appropriate fertilizers (Nitrogen, Phosphorus, Potassium-based).

In [1]:
# Import necessary libraries for data analysis and visualization

import numpy as np  # NumPy: Used for numerical computations and handling arrays
import pandas as pd  # Pandas: Used for data manipulation and analysis
import matplotlib.pyplot as plt  # Matplotlib: Used for creating static, animated, and interactive plots
import seaborn as sns  # Seaborn: Statistical data visualization library built on top of Matplotlib


In [3]:
# Function to load data from a CSV file
def load_data(file_path):
    """Load dataset from a given file path and handle file not found errors."""
    try:
        data = pd.read_csv(file_path)  # Read the CSV file into a Pandas DataFrame
        print("Data loaded successfully!")  # Print confirmation message if successful
        return data  # Return the loaded DataFrame
    except FileNotFoundError:  
        # Handle the case where the file is not found
        print("The file was not found. Please check the file path.")  
        return None  # Return None if file loading fails

# Example usage of the function
file_path = r'Crop_recommendation.csv'  # Define the file path (using raw string format for compatibility)
crop_data = load_data(file_path)  # Call the function to load the dataset


Data loaded successfully!


In [5]:
# Function to analyze the dataset
def analyze_data(data):
    """Perform basic exploratory data analysis (EDA) on the dataset."""
    
    if data is not None:  # Check if the dataset is loaded properly
        # Display the first few rows of the dataset
        print("\nDataset Preview (Head):")
        print(data.head())  

        # Display the last few rows of the dataset
        print("\nDataset Preview (Tail):")
        print(data.tail())  

        # Display dataset structure, column names, non-null values, and data types
        print("\nDataset Information:")
        data.info()  

        # Check for missing values in each column
        print("\nMissing Values in the Dataset:")
        print(data.isnull().sum())  

        # Check for duplicate rows in the dataset
        print("\nNumber of Duplicate Rows in the Dataset:")
        print(data.duplicated().sum())  

        # Display summary statistics (mean, standard deviation, min, max, etc.)
        print("\nDescriptive Statistics of the Dataset:")
        print(data.describe())  

        # Display column names in the dataset
        print("\nColumn Names in the Dataset:")
        print(data.columns)  

    else:
        print("No data to analyze.")  # Handle cases where the dataset is not loaded

# Call the function to analyze the dataset
analyze_data(crop_data)



Dataset Preview (Head):
    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice

Dataset Preview (Tail):
        N   P   K  temperature   humidity        ph    rainfall   label
2195  107  34  32    26.774637  66.413269  6.780064  177.774507  coffee
2196   99  15  27    27.417112  56.636362  6.086922  127.924610  coffee
2197  118  33  30    24.131797  67.225123  6.362608  173.322839  coffee
2198  117  32  34    26.272418  52.127394  6.758793  127.175293  coffee
2199  104  18  30    23.603016  60.396475  6.779833  140.937041  coffee

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 colu

In [9]:
# Function to visualize missing data as a heatmap
def visualize_missing_data(data):
    """Visualize missing data in the dataset using a heatmap."""
    if data.isnull().sum().sum() == 0:
        print("No missing data in the dataset.")
    else:
        plt.figure(figsize=(10, 6))
        sns.heatmap(data.isnull(), cbar=False, cmap="viridis", linewidths=0.5)
        plt.title("Missing Data Heatmap")
        plt.show()

# Check if any missing data exists
print("Missing data count per column:")
print(crop_data.isnull().sum())

# Visualizing missing data
visualize_missing_data(crop_data)

Missing data count per column:
N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64
No missing data in the dataset.


In [11]:
# Function to analyze value counts of a specific column
def analyze_column(data, column_name):
    """Analyze the value counts of a specific column."""
    if column_name in data.columns:
        print(f"\nValue Counts in the '{column_name}' Column:")
        print(data[column_name].value_counts())
    else:
        print(f"Column '{column_name}' not found.")

# Analyzing value counts of the 'label' column (change the column name if needed)
analyze_column(crop_data, 'label')


Value Counts in the 'label' Column:
label
rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: count, dtype: int64
