### Importing the Dataset

In [8]:
from google.colab import files
files.upload()

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle (1).json to kaggle (1).json


In [23]:
import kagglehub
import pandas as pd
import os

path = kagglehub.dataset_download("rajeev86/crop-demand-data-csv")
df_demand = pd.read_csv(os.path.join(path, 'Crop-Demand-Data.csv'))

df_demand.head()

Unnamed: 0,Year,Month,Region,Crop,Market_Demand
0,2015,1,Sarangarh-Bilaigarh Division,Arhar,4242.29
1,2015,1,Balod Division,Gram,3277.97
2,2015,1,Surguja Division,Groundnut,5965.6
3,2015,1,Balod Division,Jwar,105.92
4,2015,1,Surguja Division,Kulthi,3210.11


### Train-Test Split

In [25]:
from sklearn.model_selection import train_test_split

X = df_demand.drop(columns=['Market_Demand'])
y = df_demand['Market_Demand']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
X_train.head()

Unnamed: 0,Year,Month,Region,Crop
1320,2020,10,Raipur Division,Niger
1228,2020,5,Sarangarh-Bilaigarh Division,Soybean
2159,2024,6,Bilaspur Division,Soybean
1472,2021,6,Balod Division,Niger
1610,2022,1,Bilaspur Division,Til


In [27]:
y_train.head()

Unnamed: 0,Market_Demand
1320,108.19
1228,135.53
2159,157.61
1472,143.69
1610,2845.08


### Preprocessing

In [28]:
import numpy as np

def handler(df):
  df['month_sin'] = np.sin(2 * np.pi * df['Month'] / 12)
  df['month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)
  df['year'] = (df['Year'] - df['Year'].min()) / (df['Year'].max() - df['Year'].min())

  df.drop(columns=['Month', 'Year'], inplace=True)
  df = pd.get_dummies(df, columns=['Crop', 'Region'], drop_first=True)
  return df

In [29]:
from sklearn.preprocessing import FunctionTransformer

preprocessor = FunctionTransformer(handler)

X_train = preprocessor.transform(X_train)
X_test = preprocessor.transform(X_test)

### Trainig and Evaluation

In [30]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("R² Score:", r2)
print("MAE:", mae)

R² Score: 0.9907746927737492
MAE: 84.99168267543858


### Saving and manually testing the model

In [None]:
import cloudpickle

with open('preprocessor_SD.pkl', 'wb') as preprocessor_file:
    cloudpickle.dump(preprocessor, preprocessor_file)

with open('model_SD.pkl', 'wb') as model_file:
    cloudpickle.dump(model, model_file)

In [None]:
import pandas as pd
import cloudpickle
import numpy as np

columns = ['Year', 'Month', 'Crop', 'Region']
new_data = pd.DataFrame([[2025, 11, "Maize", "Surguja Division"]], columns=columns)

def suggest_demand(new_data):

  with open('model_SD.pkl', 'rb') as model_file:
      model_fit = cloudpickle.load(model_file)
  new_columns = model_fit.feature_names_in_

  with open('preprocessor_SD.pkl', 'rb') as prep_file:
      preprocessor = cloudpickle.load(prep_file)

  data = pd.DataFrame(preprocessor.transform(new_data), columns=new_columns)
  predicted_demand = model_fit.predict(data)

  return predicted_demand

float(suggest_demand(new_data)[0])

2741.9435000000008