In [1]:
#### Environment Setup

In [2]:
import fiona
import mpl_toolkits
import pandas as pd
import itertools
import geopandas as gpd
import json
from shapely.geometry import Polygon, Point, MultiPoint, MultiPolygon, shape, mapping
from shapely.prepared import prep
from shapely.ops import unary_union
import itertools

In [3]:
#### Loading Datasets

In [4]:
apy_db = pd.read_csv(r'C:/Users/Manan Arora/Desktop/AgriTech/Data Driven Business Metrics Project/Data/APY/APY_13_14_to_17_18.csv')
dist_shp = gpd.read_file(r"C:\Users\Manan Arora\Desktop\AgriTech\Data Driven Business Metrics Project\Data\Shapefiles\District\2011_dist.shp")

In [5]:
##### Crop Category Wise Production in Each District
cat_wise_prod_dist = apy_db.groupby(['State_name','District_name','Crop_category'],sort=True).agg({'Production':'sum'}).reset_index()
cat_wise_prod_dist.rename(columns={'Production':'Total_Production'},inplace=True)
cat_wise_prod_dist = cat_wise_prod_dist.pivot_table(index=['State_name','District_name'],columns='Crop_category',values='Total_Production',aggfunc='sum').reset_index()

In [8]:
#### Top Crop in Each District  
top_crop_dist = apy_db.groupby(['State_name','District_name','Crop_category','Crop_code','Crop_name'],sort=True).agg({'Production':'sum'}).reset_index()
top_crop_dist = top_crop_dist.set_index(['Crop_code','Crop_name']).groupby(['State_name','District_name','Crop_category'])['Production'].nlargest(1).reset_index()
top_crop_dist.rename(columns={'Production':'Crop_Production'},inplace=True)

In [9]:
#### Top Crop Name and Production in Each District 
top_cropname_dist = top_crop_dist.set_index(['State_name','District_name','Crop_category'])['Crop_name'].unstack().reset_index()
top_cropprod_dist = top_crop_dist.pivot_table(index=['State_name','District_name'],columns='Crop_category',values='Crop_Production',aggfunc='sum').reset_index()

In [10]:
#### Renaming Columns - TP- Total Production, TCP - Top Crop Production, TC - Top Crop Name
cat_wise_prod_dist = cat_wise_prod_dist.rename(columns={'Cereals':'CerealTP','Drug and Narcotics':'DrugsTP','Dry Fruits':'DryFruTP','Fiber Crop':'FiberTP','Fruits':'FruitsTP','Oilseeds':'OilTP','Pulses':'PulsesTP','Spices':'SpicesTP','Sugar':'SugarTP','Vegetables':'VegeTP'}).fillna(0)
top_cropname_dist = top_cropname_dist.rename(columns={'Cereals':'CerealTC','Drug and Narcotics':'DrugsTC','Dry Fruits':'DryFruTC','Fiber Crop':'FiberTC','Fruits':'FruitsTC','Oilseeds':'OilTC','Pulses':'PulsesTC','Spices':'SpicesTC','Sugar':'SugarTC','Vegetables':'VegeTC'}).fillna('Category Not Grown Here')
top_cropprod_dist = top_cropprod_dist.rename(columns={'Cereals':'CerealCP','Drug and Narcotics':'DrugsCP','Dry Fruits':'DryFruCP','Fiber Crop':'FiberCP','Fruits':'FruitsCP','Oilseeds':'OilCP','Pulses':'PulsesCP','Spices':'SpicesCP','Sugar':'SugarCP','Vegetables':'VegeCP'}).fillna(0)

In [9]:
#### Merging all datasets
dist_data = cat_wise_prod_dist.merge(top_cropname_dist,on=['State_name','District_name']).merge(top_cropprod_dist,on=['State_name','District_name'])

In [10]:
##### Calculation Percentage Contribution of the District to Overall Production of that Category in the Country
dist_data['CerealTPP'] = (dist_data['CerealTP']/sum(dist_data['CerealTP']))*100
dist_data['DrugsTPP'] = (dist_data['DrugsTP']/sum(dist_data['DrugsTP']))*100
dist_data['DryFruTPP'] = (dist_data['DryFruTP']/sum(dist_data['DryFruTP']))*100
dist_data['FiberTPP'] = (dist_data['FiberTP']/sum(dist_data['FiberTP']))*100
dist_data['FruitsTPP'] = (dist_data['FruitsTP']/sum(dist_data['FruitsTP']))*100
dist_data['OilTPP'] = (dist_data['OilTP']/sum(dist_data['OilTP']))*100
dist_data['PulsesTPP'] = (dist_data['PulsesTP']/sum(dist_data['PulsesTP']))*100
dist_data['SpicesTPP'] = (dist_data['SpicesTP']/sum(dist_data['SpicesTP']))*100
dist_data['SugarTPP'] = (dist_data['SugarTP']/sum(dist_data['SugarTP']))*100
dist_data['VegeTPP'] = (dist_data['VegeTP']/sum(dist_data['VegeTP']))*100

In [11]:
##### Calculation Percentage Contribution of the Top Crop in a Category to Overall Production of that Category in that District
dist_data['CerealCPP'] = (dist_data['CerealCP']/dist_data['CerealTP'])*100
dist_data['DrugsCPP'] = (dist_data['DrugsCP']/dist_data['DrugsTP'])*100
dist_data['DryFruCPP'] = (dist_data['DryFruCP']/dist_data['DryFruTP'])*100
dist_data['FiberCPP'] = (dist_data['FiberCP']/dist_data['FiberTP'])*100
dist_data['FruitsCPP'] = (dist_data['FruitsCP']/dist_data['FruitsTP'])*100
dist_data['OilCPP'] = (dist_data['OilCP']/dist_data['OilTP'])*100
dist_data['PulsesCPP'] = (dist_data['PulsesCP']/dist_data['PulsesTP'])*100
dist_data['SpicesCPP'] = (dist_data['SpicesCP']/dist_data['SpicesTP'])*100
dist_data['SugarCPP'] = (dist_data['SugarCP']/dist_data['SugarTP'])*100
dist_data['VegeCPP'] = (dist_data['VegeCP']/dist_data['VegeTP'])*100

In [12]:
#### Renaming Columns to Match the Information 
dist_data.rename(columns={'State_name':'ST_NM','District_name':'DISTRICT'},inplace=True)

In [13]:
all_dist_data = dist_shp.merge(dist_data,on=['ST_NM','DISTRICT'],how='left')
all_dist_data.fillna(0,inplace=True)

In [14]:
for index in range(len(all_dist_data)):
    try:
        all_dist_data.loc[index,'geometry'] = Polygon(all_dist_data.loc[index,'geometry'][0])
    except:
        all_dist_data.loc[index,'geometry'] = all_dist_data.loc[index,'geometry']

In [15]:
all_dist_data.to_file(driver = 'ESRI Shapefile', filename= r"C:\Users\Manan Arora\Desktop\AgriTech\Visualizations\India Shapes\India_Agri_Data.shp")

In [16]:
state_data = gpd.read_file(r"C:\Users\Manan Arora\Desktop\AgriTech\Data Driven Business Metrics Project\Data\Shapefiles\State\Admin2.shp")

In [17]:
for index in range(len(state_data)):
    try:
        state_data.loc[index,'geometry'] = Polygon(state_data.loc[index,'geometry'][0])
    except:
        state_data.loc[index,'geometry'] = state_data.loc[index,'geometry']

In [18]:
state_data.to_file(driver = 'ESRI Shapefile', filename= r"C:\Users\Manan Arora\Desktop\AgriTech\Visualizations\India Shapes\India_Agri_State_Data.shp")