In [13]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split

In [14]:
data = pd.read_csv("Bacillus_Bacteria_Soil_Dataset.csv")

In [15]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 376 entries, 0 to 375
Data columns (total 9 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Date                             376 non-null    object 
 1   Soil_Temperature (°C)            376 non-null    float64
 2   Rainfall_Amount (mm)             376 non-null    float64
 3   Soil_pH                          376 non-null    float64
 4   UV_Index (UVI)                   376 non-null    float64
 5   Season                           376 non-null    object 
 6   Soil_Texture                     376 non-null    object 
 7   Tillage_Practice                 376 non-null    object 
 8   Bacillus_Bacteria_Count (CFU/g)  376 non-null    float64
dtypes: float64(5), object(4)
memory usage: 26.6+ KB


Step 2: Encode Categorical Columns

In [16]:
categorical_cols = ['Season', 'Soil_Texture', 'Tillage_Practice']
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le


Step 3: Feature Scaling

In [17]:
features = data.drop(columns=['Date', 'Bacillus_Bacteria_Count (CFU/g)'])
target = data['Bacillus_Bacteria_Count (CFU/g)']

scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)
target_scaled = MinMaxScaler().fit_transform(target.values.reshape(-1, 1))


Step 4: Create Sequences for RNN

In [18]:
def create_sequences(X, y, time_steps=3):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 3
X_seq, y_seq = create_sequences(features_scaled, target_scaled, time_steps)


Step 5: Train-Test Split

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

Step 6: Build the RNN Model

In [20]:
model = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1) 
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

  super().__init__(**kwargs)


Step 7: Train the Model

In [21]:
history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test), verbose=1)

Epoch 1/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 0.1636 - mae: 0.3453 - val_loss: 0.0512 - val_mae: 0.1933
Epoch 2/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0517 - mae: 0.1823 - val_loss: 0.0470 - val_mae: 0.1747
Epoch 3/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0506 - mae: 0.1869 - val_loss: 0.0438 - val_mae: 0.1726
Epoch 4/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0467 - mae: 0.1783 - val_loss: 0.0441 - val_mae: 0.1721
Epoch 5/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0478 - mae: 0.1804 - val_loss: 0.0447 - val_mae: 0.1717
Epoch 6/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0422 - mae: 0.1689 - val_loss: 0.0438 - val_mae: 0.1720
Epoch 7/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0454 

Step 8: Evaluate the Model

In [22]:
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Loss: {loss:.4f} | MAE: {mae:.4f}")

Loss: 0.0439 | MAE: 0.1678


In [23]:
from sklearn.preprocessing import MinMaxScaler

mae = 0.1679
y_min = 0   
y_max = 2    

target_range = y_max - y_min

accuracy = (1 - mae / target_range) * 100
print(f"🎯 Approximated Accuracy: {accuracy:.2f}%")

🎯 Approximated Accuracy: 91.61%


In [24]:
from tensorflow.keras.models import load_model
import joblib

model.save("bacillus_lstm_model.h5")

joblib.dump(scaler, "feature_scaler.pkl")
joblib.dump(target_scaled, "target_scaler.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")




['label_encoders.pkl']

In [None]:

df_filtered = df[~df['Crop'].isin(['Wheat', 'Cotton', 'Rice'])]
df_filtered.to_csv('turmeric_similar_crops.csv', index=False)
