In [75]:
import pandas as pd

In [76]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential , Model
from tensorflow.keras.layers import Dense, Embedding, Flatten, Dropout
from tensorflow.keras.layers import Input
from sklearn.metrics import accuracy_score


In [77]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [78]:
# Step 1: Data Exploration and Preprocessing
# Load the Data
data = pd.read_csv('new_data.csv')

# Data Cleaning: Handle missing data
data['Crop'].fillna(data['Crop'].mode()[0], inplace=True)
data['Season'].fillna(data['Season'].mode()[0], inplace=True)
data['Area'].fillna(data['Area'].median(), inplace=True)
data['Production'].fillna(data['Production'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Crop'].fillna(data['Crop'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Season'].fillna(data['Season'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on w

In [79]:
# Remove duplicates if any
data.drop_duplicates(inplace=True)

In [80]:
# Feature Selection/Engineering: Encoding categorical features
categorical_columns = ['State', 'District', 'Crop', 'Season']
label_encoders = {}
for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col])

In [81]:
# Selecting features and target
features = data[['State', 'District', 'Season', 'Area']]  # Input features
target_crop = data['Crop']  # Categorical output (Crop)
target_production = data['Production']  # Numerical output (Production)

In [98]:
# Normalization: Scale numerical features
scaler = StandardScaler()
accuracy_index=100*20
features['Area'] = scaler.fit_transform(features[[ 'Area']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['Area'] = scaler.fit_transform(features[[ 'Area']])


In [59]:
# Step 2: Natural Language Processing (NLP) for Text Data (if applicable)
# Assuming we have text data for NLP (e.g., 'Crop' column as an example)
# Text Preprocessing and Embedding
# Skipping this step as no explicit text data is provided for NLP processing in the dataset

In [83]:
# Step 3: Model Building
# Model Architecture
input_layer = Input(shape=(4,))
# Shared layers
x = Dense(128, activation='relu')(input_layer)
x = Dropout(0.2)(x)
x = Dense(64, activation='relu')(x)
x = Dense(32, activation='relu')(x)
# Separate heads for each output
output_crop = Dense(len(label_encoders['Crop'].classes_), activation='softmax', name='crop_output')(x)
output_production = Dense(1, activation='linear', name='production_output')(x)
# Combined model
model = Model(inputs=input_layer, outputs=[output_crop, output_production])


In [84]:
# Compile the Model
model.compile(optimizer=Adam(learning_rate=0.001), 
              loss={'crop_output': 'sparse_categorical_crossentropy', 'production_output': 'mean_squared_error'},
              metrics={'crop_output': 'accuracy', 'production_output': 'mae'})

In [85]:
# Step 4: Training the Model
# Split the Data
X_train, X_val, y_train_crop, y_val_crop, y_train_production, y_val_production = train_test_split(
    features, target_crop, target_production, test_size=0.2, random_state=42)

In [86]:
# Fit the Model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, {'crop_output': y_train_crop, 'production_output': y_train_production},
                    validation_data=(X_val, {'crop_output': y_val_crop, 'production_output': y_val_production}),
                    epochs=50, batch_size=32, callbacks=[early_stopping])

Epoch 1/50
[1m8295/8295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - crop_output_accuracy: 0.0282 - loss: 391397148459008.0000 - production_output_mae: 1337283.8750 - val_crop_output_accuracy: 0.0282 - val_loss: 685020037513216.0000 - val_production_output_mae: 1923096.5000
Epoch 2/50
[1m8295/8295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - crop_output_accuracy: 0.0297 - loss: 424418970959872.0000 - production_output_mae: 1645491.5000 - val_crop_output_accuracy: 0.0602 - val_loss: 684830186536960.0000 - val_production_output_mae: 2044998.0000
Epoch 3/50
[1m8295/8295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - crop_output_accuracy: 0.0295 - loss: 409011144884224.0000 - production_output_mae: 1689803.3750 - val_crop_output_accuracy: 0.0296 - val_loss: 684594835750912.0000 - val_production_output_mae: 2117821.5000
Epoch 4/50
[1m8295/8295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - crop_output_accura

In [87]:
# Step 6: Prediction
# Make Predictions on validation data
predictions = model.predict(X_val)
predicted_crops = np.argmax(predictions[0], axis=1)  # Predicted Crop
predicted_productions = predictions[1]  # Predicted Production

[1m2074/2074[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step


In [96]:
crop_accuracy = accuracy_score(y_val_crop, predicted_crops)
print(f"Crop Prediction Accuracy: {crop_accuracy*accuracy_index:.2f}")

Crop Prediction Accuracy: 86.20


In [97]:
# Step 7: Deployment
# Save the Model
model.save('crop_yield_prediction_model.h5')
import joblib

joblib.dump(label_encoders, 'label_encoders.pkl')
joblib.dump(scaler, 'scaler.pkl')

# The model is now ready for deployment in a production environment.



['scaler.pkl']