In [42]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_absolute_error, accuracy_score
from scipy.optimize import linprog
import matplotlib.pyplot as plt
import seaborn as sns
import folium


print("All packages are installed and working!")


All packages are installed and working!


In [44]:
# Load datasets
crop_production = pd.read_csv('crop_production.csv')
district_population = pd.read_csv('district_level_population.csv')
food = pd.read_csv('food.csv')
nutrition = pd.read_csv('nutrition.csv')
rainfall = pd.read_csv('rainfall.csv')
yield_complete = pd.read_csv('yield_complete.csv')
necessary_nutrition = pd.read_csv('necessary_nutrition.csv')

# Merge datasets on common keys
merged_data = pd.merge(crop_production, district_population, left_on=['state_name', 'district_name'], right_on=['state name', 'dist name'])


In [45]:
rainfall.columns

Index(['st_code', 'state/region', 'year', 'actual rainfall',
       'normal rainfall'],
      dtype='object')

In [46]:
merged_data

Unnamed: 0,state_name,district_name,crop_year,season,crop,area,production,dist code,year,state code,state name,dist name,adult male crossbred (1000 number),adult male indigenous (1000 number),adult female crossbred (1000 number),adult female indigenous (1000 number),young male crossbred (1000 number),young male indigenous (1000 number),young female crossbred (1000 number),young female indigenous (1000 number)
0,andhra pradesh,chittoor,1997,kharif,arhar/tur,6100.0,900.0,54,1966,1,andhra pradesh,chittoor,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00
1,andhra pradesh,chittoor,1997,kharif,arhar/tur,6100.0,900.0,54,1972,1,andhra pradesh,chittoor,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00
2,andhra pradesh,chittoor,1997,kharif,arhar/tur,6100.0,900.0,54,1977,1,andhra pradesh,chittoor,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00
3,andhra pradesh,chittoor,1997,kharif,arhar/tur,6100.0,900.0,54,1983,1,andhra pradesh,chittoor,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00
4,andhra pradesh,chittoor,1997,kharif,arhar/tur,6100.0,900.0,54,1987,1,andhra pradesh,chittoor,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1086197,west bengal,purulia,2014,winter,sesamum,175.0,88.0,817,1997,13,west bengal,purulia,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00,-1.00
1086198,west bengal,purulia,2014,winter,sesamum,175.0,88.0,817,2003,13,west bengal,purulia,1.38,393.61,2.50,184.26,1.09,119.58,1.31,141.10
1086199,west bengal,purulia,2014,winter,sesamum,175.0,88.0,817,2007,13,west bengal,purulia,2.24,462.34,6.61,225.26,4.56,101.74,2.96,138.48
1086200,west bengal,purulia,2014,winter,sesamum,175.0,88.0,817,2012,13,west bengal,purulia,1.79,408.89,6.03,207.16,1.79,68.78,3.87,137.81


In [47]:
merged_data = pd.merge(merged_data, rainfall, left_on=['state_name', 'year'], right_on=['state/region','year'])

In [48]:
merged_data

Unnamed: 0,state_name,district_name,crop_year,season,crop,area,production,dist code,year,state code,...,adult female crossbred (1000 number),adult female indigenous (1000 number),young male crossbred (1000 number),young male indigenous (1000 number),young female crossbred (1000 number),young female indigenous (1000 number),st_code,state/region,actual rainfall,normal rainfall
0,andhra pradesh,chittoor,1997,kharif,arhar/tur,6100.0,900.0,54,2019,1,...,468.75,54.78,32.61,9.84,319.65,38.31,1,andhra pradesh,899.1,-1
1,andhra pradesh,chittoor,1997,kharif,bajra,1700.0,1900.0,54,2019,1,...,468.75,54.78,32.61,9.84,319.65,38.31,1,andhra pradesh,899.1,-1
2,andhra pradesh,chittoor,1997,kharif,dry chillies,600.0,800.0,54,2019,1,...,468.75,54.78,32.61,9.84,319.65,38.31,1,andhra pradesh,899.1,-1
3,andhra pradesh,chittoor,1997,kharif,groundnut,234900.0,144200.0,54,2019,1,...,468.75,54.78,32.61,9.84,319.65,38.31,1,andhra pradesh,899.1,-1
4,andhra pradesh,chittoor,1997,kharif,horse-gram,3100.0,2000.0,54,2019,1,...,468.75,54.78,32.61,9.84,319.65,38.31,1,andhra pradesh,899.1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140393,west bengal,purulia,2014,summer,rice,306.0,801.0,817,2019,13,...,8.60,216.49,1.37,76.29,6.31,179.11,13,west bengal,1733.4,-1
140394,west bengal,purulia,2014,summer,sesamum,627.0,463.0,817,2019,13,...,8.60,216.49,1.37,76.29,6.31,179.11,13,west bengal,1733.4,-1
140395,west bengal,purulia,2014,whole year,sugarcane,324.0,16250.0,817,2019,13,...,8.60,216.49,1.37,76.29,6.31,179.11,13,west bengal,1733.4,-1
140396,west bengal,purulia,2014,winter,rice,279151.0,597899.0,817,2019,13,...,8.60,216.49,1.37,76.29,6.31,179.11,13,west bengal,1733.4,-1


In [49]:
merged_data = pd.merge(merged_data, yield_complete, left_on=['state_name', 'district_name', 'crop_year'], right_on=['state.name', 'dist.name', 'year'])

In [50]:
merged_data

Unnamed: 0,state_name,district_name,crop_year,season,crop,area,production,dist code,year_x,state code,...,sugarcane.yield..kg.per.ha.,cotton.area..1000.ha.,cotton.production..1000.tons.,cotton.yield..kg.per.ha.,fruits.area..1000.ha.,vegetables.area..1000.ha.,fruits.and.vegetables.area..1000.ha.,potatoes.area..1000.ha.,onion.area..1000.ha.,fodder.area..1000.ha.
0,andhra pradesh,chittoor,1997,kharif,arhar/tur,6100.0,900.0,54,2019,1,...,8645.16,0.0,0.0,0.0,33.76,11.24,45.00,0.68,0.00,3.46
1,andhra pradesh,chittoor,1997,kharif,bajra,1700.0,1900.0,54,2019,1,...,8645.16,0.0,0.0,0.0,33.76,11.24,45.00,0.68,0.00,3.46
2,andhra pradesh,chittoor,1997,kharif,dry chillies,600.0,800.0,54,2019,1,...,8645.16,0.0,0.0,0.0,33.76,11.24,45.00,0.68,0.00,3.46
3,andhra pradesh,chittoor,1997,kharif,groundnut,234900.0,144200.0,54,2019,1,...,8645.16,0.0,0.0,0.0,33.76,11.24,45.00,0.68,0.00,3.46
4,andhra pradesh,chittoor,1997,kharif,horse-gram,3100.0,2000.0,54,2019,1,...,8645.16,0.0,0.0,0.0,33.76,11.24,45.00,0.68,0.00,3.46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140393,west bengal,purulia,2014,summer,rice,306.0,801.0,817,2019,13,...,5093.75,0.0,0.0,0.0,5.65,36.49,42.14,0.48,1.51,0.00
140394,west bengal,purulia,2014,summer,sesamum,627.0,463.0,817,2019,13,...,5093.75,0.0,0.0,0.0,5.65,36.49,42.14,0.48,1.51,0.00
140395,west bengal,purulia,2014,whole year,sugarcane,324.0,16250.0,817,2019,13,...,5093.75,0.0,0.0,0.0,5.65,36.49,42.14,0.48,1.51,0.00
140396,west bengal,purulia,2014,winter,rice,279151.0,597899.0,817,2019,13,...,5093.75,0.0,0.0,0.0,5.65,36.49,42.14,0.48,1.51,0.00


In [51]:
merged_data.fillna(method='ffill', inplace=True)

  merged_data.fillna(method='ffill', inplace=True)


In [52]:
merged_data.isna().sum()

state_name                              0
district_name                           0
crop_year                               0
season                                  0
crop                                    0
                                       ..
vegetables.area..1000.ha.               0
fruits.and.vegetables.area..1000.ha.    0
potatoes.area..1000.ha.                 0
onion.area..1000.ha.                    0
fodder.area..1000.ha.                   0
Length: 104, dtype: int64

In [53]:
merged_data.to_csv('final_dataset.csv',index=False)