# Crop Production

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# pd.set_option('display.max_rows', None)

In [2]:
df = pd.read_csv('Crop.csv')

In [134]:
df.head()

Unnamed: 0,Column1,District_Name,Crop_Year,Season,Crop,Area,Production
0,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Arecanut,1254.0,2000.0
1,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Other Kharif pulses,2.0,1.0
2,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Rice,102.0,321.0
3,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Banana,176.0,641.0
4,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Cashewnut,720.0,165.0


# Null Values

In [135]:
# Production has null values so I decided to remove it because it was just <5% of data
df.isna().sum()

Column1             0
District_Name       0
Crop_Year           0
Season              0
Crop                0
Area                0
Production       3730
dtype: int64

In [138]:
df.dropna(inplace=True)

In [139]:
# Feature having 0 production is of no use also it was small chunk of data out of 26lakh rows
df = df[df["Production"]!= 0]

In [136]:
# Changing into Date format
df['Crop_Year'] = pd.to_datetime(df['Crop_Year'], format='%Y')

In [137]:
# Renaming the state column
df.rename(columns={"Column1":"State"},inplace=True)

In [140]:
# lot of blank trailing spaces got removes here 
df["State"] = df["State"].str.strip()
df["Season"] = df["Season"].str.strip()
df["Crop"] = df["Crop"].str.strip()

# Feature Engineering

In [141]:
# Diving states into regions
def regions(state):
    east = ['Arunachal Pradesh','Assam','Bihar','Jharkhand','Manipur','Meghalaya', 'Mizoram', 'Nagaland','Odisha','Sikkim','Tripura','West Bengal','Chhattisgarh']
    south = ['Andhra Pradesh','Karnataka', 'Kerala','Puducherry','Tamil Nadu', 'Telangana','Andaman and Nicobar Islands',]
    west = ['Dadra and Nagar Haveli','Goa','Gujarat','Maharashtra',]
    north = ['Chandigarh','Haryana','Himachal Pradesh','Jammu and Kashmir','Madhya Pradesh','Punjab', 'Rajasthan','Uttar Pradesh','Uttarakhand',]
    
    
    if state in south:
        return "South"
    elif state in north:
        return "North"
    elif state in west:
        return "West"
    else:
        return "East"
    
df['Regions'] = df['State'].apply(regions)

# Preprocessing and Cleaning

In [145]:
# Rabi and Kharif are less known words so I combined them with their overlapping seasons
df.Season.replace({"Kharif":"Monsoon","Rabi":"Winter","Autumn":"Winter"},inplace=True)

In [147]:
# Cleaning lot of crops name
df.Crop.replace({"Arcanut (Processed)":"Arecanut"},inplace=True)
df.Crop.replace({'Atcanut (Raw)':"Arecanut"},inplace=True)
df.Crop.replace({'Arcanut':"Arecanut"},inplace=True)

df.Crop.replace({"Kapas":"Cotton"},inplace=True)
df.Crop.replace({"Cotton(lint)":"Cotton"},inplace=True)

df.Crop.replace({"Jute & mesta":"Jute"},inplace=True)

# Making new category for Pulses that have miniscule contribution in the data
df.Crop.replace({'other misc. pulses':"Pulses"},inplace=True)
df.Crop.replace({'Other Kharif pulses':"Pulses"},inplace=True)
df.Crop.replace({'Peas & beans (Pulses)':"Pulses"},inplace=True)
df.Crop.replace({'Pulses total':"Pulses"},inplace=True)
df.Crop.replace({'Other  Rabi pulses':"Pulses"},inplace=True)
df.Crop.replace({'other misc. pulses':"Pulses"},inplace=True)
df.Crop.replace({'Lentil':"Pulses"},inplace=True)
df.Crop.replace({'Bean':"Pulses"},inplace=True)
df.Crop.replace({'pulses':"Pulses"},inplace=True)
df.Crop.replace({'Ricebean (nagadal)':'Pulses'},inplace=True)
df.Crop.replace({'Korra':'Pulses'},inplace=True)

# Making new category for oilseeds that have miniscule contribution in the data
df.Crop.replace({'other oilseeds':"Other Oilseeds"},inplace=True)
df.Crop.replace({'Niger seed':"Other Oilseeds"},inplace=True)
df.Crop.replace({'Oilseeds total':"Other Oilseeds"},inplace=True)

df.Crop.replace({'Dry ginger':"Ginger"},inplace=True)

df.Crop.replace({'Cashewnut Processed':"Cashewnut"},inplace=True)
df.Crop.replace({'Cashewnut Raw':"Cashewnut"},inplace=True)

# Making new category for Fruits that have miniscule contribution in the data
df.Crop.replace({'Papaya':"Other Fruits"},inplace=True)
df.Crop.replace({'Water Melon':"Other Fruits"},inplace=True)
df.Crop.replace({'Litchi':"Other Fruits"},inplace=True)
df.Crop.replace({'Pome Granet':"Other Fruits"},inplace=True)
df.Crop.replace({'Apple':"Other Fruits"},inplace=True)
df.Crop.replace({'Ber':"Other Fruits"},inplace=True)
df.Crop.replace({'Pump Kin':"Other Fruits"},inplace=True)
df.Crop.replace({'Peach':"Other Fruits"},inplace=True)
df.Crop.replace({'Pear':"Other Fruits"},inplace=True)
df.Crop.replace({'Other Citrus Fruit':"Other Fruits"},inplace=True)
df.Crop.replace({'Citrus Fruit':"Other Fruits"},inplace=True)
df.Crop.replace({'Grapes':"Other Fruits"},inplace=True)
df.Crop.replace({'Jack Fruit':"Other Fruits"},inplace=True)
df.Crop.replace({'Jobster':"Other Fruits"},inplace=True)
df.Crop.replace({'Lemon':"Other Fruits"},inplace=True)
df.Crop.replace({'Other Fresh Fruits':"Other Fruits"},inplace=True)
df.Crop.replace({'Pome Fruit':"Other Fruits"},inplace=True)
df.Crop.replace({'Sapota':"Other Fruits"},inplace=True)

# Making new category for Veggies that have miniscule contribution in the data
df.Crop.replace({'Beet Root':"Other Vegetables"},inplace=True)
df.Crop.replace({'Ash Gourd':"Other Vegetables"},inplace=True)
df.Crop.replace({'Cucumber':"Other Vegetables"},inplace=True)
df.Crop.replace({'Snak Guard':"Other Vegetables"},inplace=True)
df.Crop.replace({'Lab-Lab':"Other Vegetables"},inplace=True)
df.Crop.replace({'Ribed Guard':"Other Vegetables"},inplace=True)
df.Crop.replace({'Peas  (vegetable)':"Other Vegetables"},inplace=True)
df.Crop.replace({'Yam':"Other Vegetables"},inplace=True)
df.Crop.replace({'Beans & Mutter(Vegetable)':"Other Vegetables"},inplace=True)
df.Crop.replace({'Bhindi':"Other Vegetables"},inplace=True)
df.Crop.replace({'Bitter Gourd':"Other Vegetables"},inplace=True)
df.Crop.replace({'Brinjal':"Other Vegetables"},inplace=True)
df.Crop.replace({'Cabbage':"Other Vegetables"},inplace=True)
df.Crop.replace({'Bottle Gourd':"Other Vegetables"},inplace=True)
df.Crop.replace({'Cauliflower':"Other Vegetables"},inplace=True)
df.Crop.replace({'Carrot':"Other Vegetables"},inplace=True)
df.Crop.replace({'Colocosia':"Other Vegetables"},inplace=True)
df.Crop.replace({'Cowpea(Lobia)':"Other Vegetables"},inplace=True)
df.Crop.replace({'Drum Stick':"Other Vegetables"},inplace=True)
df.Crop.replace({'Vegetable':"Other Vegetables"},inplace=True)
df.Crop.replace({'Perilla':"Other Vegetables"},inplace=True)
df.Crop.replace({'Redish':"Other Vegetables"},inplace=True)
df.Crop.replace({'Turnip':"Other Vegetables"},inplace=True)

# Making new category for Cereals and millets that have miniscule contribution in the data
df.Crop.replace({'Small millets':'Other Cereals & Millets'},inplace=True)
df.Crop.replace({'Total foodgrain':'Other Cereals & Millets'},inplace=True)
df.Crop.replace({'Varagu':'Other Cereals & Millets'},inplace=True)

df.Crop.replace({'Paddy':'Rice'},inplace=True)

df.Crop.replace({'Gram':'Other Grams'},inplace=True)
df.Crop.replace({'Rajmash Kholar':'Other Grams'},inplace=True)

In [148]:
# Removing these values since their value count was below 10 and was hindering me into doing visualization
exclude = ["other fibres",'Other Dry Fruit','Cond-spcs other','Coffee', 'Rubber']

df = df[~df['Crop'].isin(exclude)]

In [150]:
# Exporting cleaned data for analysis
df.to_csv('Cleaned_Crop.csv',index=False)