In [2]:
import pandas as pd
import numpy as np
import json
import sys
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error

In [3]:
# Load the dataset
df = pd.read_csv("crop_yield.csv")


In [4]:
# Drop rows with missing values in the "Production" column
df = df.dropna(subset=['Production'])


In [5]:
df

Unnamed: 0,State_Name,District_Name,Crop_Year,Season,Crop,Area,Production
0,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Arecanut,1254.0,2000.0
1,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Other Kharif pulses,2.0,1.0
2,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Rice,102.0,321.0
3,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Banana,176.0,641.0
4,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Cashewnut,720.0,165.0
...,...,...,...,...,...,...,...
246086,West Bengal,PURULIA,2014,Summer,Rice,306.0,801.0
246087,West Bengal,PURULIA,2014,Summer,Sesamum,627.0,463.0
246088,West Bengal,PURULIA,2014,Whole Year,Sugarcane,324.0,16250.0
246089,West Bengal,PURULIA,2014,Winter,Rice,279151.0,597899.0


In [6]:
# Separate the features and target variables
X = df.drop(['Production'], axis=1)
y = df['Production']


In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
# Categorical columns for one-hot encoding
categorical_cols = ['State_Name', 'District_Name', 'Season', 'Crop']
numerical_cols = ['Area']  # Assuming 'Area' is the only numerical column


In [9]:
# Create preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        ('num', 'passthrough', numerical_cols)
    ]
)

In [10]:
# Create preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        ('num', 'passthrough', numerical_cols)
    ]
)

In [11]:
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBRegressor(n_estimators=100, n_jobs=-1, random_state=42))  # Fixed closing parenthesis
])

In [12]:
# Train the model using pipeline
pipeline.fit(X_train, y_train)

In [13]:
# Make predictions
y_pred = pipeline.predict(X_test)

In [14]:
# Compute R² Score
r2 = r2_score(y_test, y_pred)

# Compute RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))


In [15]:
# Print results
print(f"R² Score: {r2:.4f}")
print(f"RMSE: {rmse:.4f}")

R² Score: 0.7683
RMSE: 9655116.9515


In [30]:
# **Check if enough command-line arguments are provided**
if len(sys.argv) < 6:
    print("Insufficient arguments provided! Please enter details manually.")
    Jstate = input("Enter State Name: ")
    Jdistrict = input("Enter District Name: ")
    Jseason = input("Enter Season: ")
    Jcrops = input("Enter Crop: ")
    Jarea = float(input("Enter Area: "))  # Convert to float
else:
    Jstate = sys.argv[1]
    Jdistrict = sys.argv[2]
    Jseason = sys.argv[3]
    Jcrops = sys.argv[4]
    Jarea = float(sys.argv[5])  # Convert to float

Insufficient arguments provided! Please enter details manually.
Enter State Name: bihar
Enter District Name: sasaram
Enter Season: kharif
Enter Crop: rice
Enter Area: 12


In [31]:
# Prepare user input data
user_input = pd.DataFrame([[Jstate, Jdistrict, Jseason, Jcrops, Jarea]], 
                          columns=['State_Name', 'District_Name', 'Season', 'Crop', 'Area'])


In [32]:
# Make a prediction for user input
prediction = pipeline.predict(user_input)


In [33]:
# Print the predicted production
print(f"Predicted Production: {prediction[0]:.2f}"  "Tons")

Predicted Production: -1548.68Tons


In [34]:
# Save the trained pipeline as a .pkl file
joblib.dump(pipeline, "model.pkl")

print("Model saved as crop_yield_model.pkl")

Model saved as crop_yield_model.pkl
