In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.read_excel("cropreco.xlsx")

In [None]:
df.head()

Unnamed: 0,Temperature,Humidity,pH,Rainfall,Label
0,20.879744,82.002744,6.502985,202.935536,Rice
1,21.770462,80.319644,7.038096,226.655537,Rice
2,23.004459,82.320763,7.840207,263.964248,Rice
3,26.491096,80.158363,6.980401,242.864034,Rice
4,20.130175,81.604873,7.628473,262.71734,Rice


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7000 entries, 0 to 6999
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Temperature  7000 non-null   float64
 1   Humidity     7000 non-null   float64
 2   pH           7000 non-null   float64
 3   Rainfall     7000 non-null   float64
 4   Label        7000 non-null   object 
dtypes: float64(4), object(1)
memory usage: 273.6+ KB


In [None]:
df["Label"].value_counts()

Rice              100
Lettuce           100
Rajma             100
Turmeric          100
Guava             100
                 ... 
Green Chillies    100
Carrot            100
Ginger            100
Garlic            100
Mustard           100
Name: Label, Length: 70, dtype: int64

In [None]:
df.shape

(7000, 5)

In [None]:
df.isna().sum()

Temperature    0
Humidity       0
pH             0
Rainfall       0
Label          0
dtype: int64

Data Preprocessing

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
label_encoder = LabelEncoder()

In [None]:
# Apply label encoding to the 'Gender' column
df['Label'] = label_encoder.fit_transform(df['Label'])

In [None]:
df.head()

Unnamed: 0,Temperature,Humidity,pH,Rainfall,Label
0,20.879744,82.002744,6.502985,202.935536,59
1,21.770462,80.319644,7.038096,226.655537,59
2,23.004459,82.320763,7.840207,263.964248,59
3,26.491096,80.158363,6.980401,242.864034,59
4,20.130175,81.604873,7.628473,262.71734,59


In [None]:
cols = ['Temperature', 'Humidity', 'pH', 'Rainfall']

In [None]:
# Initialize the MinMaxScaler
scaler = MinMaxScaler()

In [None]:
# Apply MinMaxScaler to selected columns
df[cols] = scaler.fit_transform(df[cols])

In [None]:
df.head()

Unnamed: 0,Temperature,Humidity,pH,Rainfall,Label
0,0.36313,0.808636,0.466264,0.030608,59
1,0.385023,0.790721,0.54948,0.034582,59
2,0.415353,0.812021,0.674219,0.040831,59
3,0.501049,0.789005,0.540508,0.037297,59
4,0.344707,0.804401,0.641291,0.040622,59


Training the model

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.model_selection import train_test_split

In [None]:
X = df.drop("Label", axis=1)
y = df["Label"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
classifiers = {
    'K Nearest Neigbour': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Gaussian Naive Bayes': GaussianNB(),
    'Decision Tree Classifier': DecisionTreeClassifier(),
    'Support Vector Classifier': SVC(),
    'Random Forest classifier': RandomForestClassifier()
}

In [None]:
for name, classifier in classifiers.items():
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name.capitalize()} Classifier Accuracy: {accuracy:.4f}")

K nearest neigbour Classifier Accuracy: 0.8571
Logistic regression Classifier Accuracy: 0.4929
Gaussian naive bayes Classifier Accuracy: 0.9500
Decision tree classifier Classifier Accuracy: 0.9443
Support vector classifier Classifier Accuracy: 0.7814
Random forest classifier Classifier Accuracy: 0.9671


Using LightBGM model

In [None]:
import lightgbm as lgb

In [None]:
# Convert data to LightGBM dataset format
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

In [None]:
# LightGBM parameters for multiclass classification
params = {
    'objective': 'multiclass',
    'num_class': len(set(y_train)),
    'metric': 'multi_logloss',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9
}

In [None]:
# Train the model
num_round = 100
bst = lgb.train(params, train_data, num_round, valid_sets=[test_data])

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000823 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1020
[LightGBM] [Info] Number of data points in the train set: 5600, number of used features: 4
[LightGBM] [Info] Start training from score -4.153185
[LightGBM] [Info] Start training from score -4.248495
[LightGBM] [Info] Start training from score -4.187871
[LightGBM] [Info] Start training from score -4.313034
[LightGBM] [Info] Start training from score -4.261074
[LightGBM] [Info] Start training from score -4.273813
[LightGBM] [Info] Start training from score -4.367842
[LightGBM] [Info] Start training from score -4.187871
[LightGBM] [Info] Start training from score -4.236073
[LightGBM] [Info] Start training from score -4.223803
[LightGBM] [Info] Start training from score -4.261074
[LightGBM] [Info] Start training from score -4.187871
[LightGBM] [Info] Start training from score -4.236073
[LightGBM] 

In [None]:
# Make predictions
y_pred = bst.predict(X_test)
y_pred_class = [list(x).index(max(x)) for x in y_pred]

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_class)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9614


Using LightBGM with Hyperparameter Flaml

In [None]:
!pip install flaml

Collecting flaml
  Downloading FLAML-2.1.1-py3-none-any.whl (295 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/295.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/295.2 kB[0m [31m3.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.2/295.2 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: flaml
Successfully installed flaml-2.1.1


In [None]:
from flaml import AutoML

In [None]:
# Define the search space for FLAML
search_space = {
    "n_estimators": (10, 200),  # Number of boosting rounds
    "num_leaves": (4, 128),  # Maximum tree leaves for base learners
    "min_child_samples": (2, 20),  # Minimum number of data points per leaf
    "learning_rate": (0.01, 0.1),  # Learning rate
}

In [None]:
# Initialize FLAML's AutoML class
automl = AutoML()

In [None]:
# Specify the task as 'classification' and the metric as 'accuracy'
automl_settings = {
    "time_budget": 3600,
    "metric": 'accuracy',
    "task": 'classification',
}

In [None]:
# Train a LightGBM model using FLAML
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

[flaml.automl.logger: 12-22 22:17:10] {1679} INFO - task = classification
[flaml.automl.logger: 12-22 22:17:10] {1690} INFO - Evaluation method: cv
[flaml.automl.logger: 12-22 22:17:10] {1788} INFO - Minimizing error metric: 1-accuracy
[flaml.automl.logger: 12-22 22:17:10] {1900} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl.logger: 12-22 22:17:10] {2218} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 12-22 22:17:12] {2344} INFO - Estimated sufficient time budget=24221s. Estimated necessary time budget=558s.
[flaml.automl.logger: 12-22 22:17:12] {2391} INFO -  at 2.5s,	estimator lgbm's best error=0.0950,	best estimator lgbm's best error=0.0950
[flaml.automl.logger: 12-22 22:17:12] {2218} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 12-22 22:17:13] {2391} INFO -  at 3.0s,	estimator lgbm's best error=0.0950,	best estimator lgbm's best error=0.0950
[flaml.automl.logger: 12-22 22:1

INFO:flaml.tune.searcher.blendsearch:No low-cost partial config given to the search algorithm. For cost-frugal search, consider providing low-cost values for cost-related hps via 'low_cost_partial_config'. More info can be found at https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune


[flaml.automl.logger: 12-22 23:03:17] {2391} INFO -  at 2767.3s,	estimator lrl1's best error=0.2963,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:03:17] {2218} INFO - iteration 288, current learner lrl1




[flaml.automl.logger: 12-22 23:03:25] {2391} INFO -  at 2775.0s,	estimator lrl1's best error=0.2963,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:03:25] {2218} INFO - iteration 289, current learner lrl1




[flaml.automl.logger: 12-22 23:03:38] {2391} INFO -  at 2787.8s,	estimator lrl1's best error=0.2559,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:03:38] {2218} INFO - iteration 290, current learner rf




[flaml.automl.logger: 12-22 23:04:06] {2391} INFO -  at 2815.9s,	estimator rf's best error=0.0379,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:04:06] {2218} INFO - iteration 291, current learner lrl1
[flaml.automl.logger: 12-22 23:04:13] {2391} INFO -  at 2823.2s,	estimator lrl1's best error=0.2559,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:04:13] {2218} INFO - iteration 292, current learner xgb_limitdepth
[flaml.automl.logger: 12-22 23:04:31] {2391} INFO -  at 2841.6s,	estimator xgb_limitdepth's best error=0.0391,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:04:31] {2218} INFO - iteration 293, current learner lrl1




[flaml.automl.logger: 12-22 23:04:44] {2391} INFO -  at 2854.4s,	estimator lrl1's best error=0.2495,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:04:44] {2218} INFO - iteration 294, current learner xgb_limitdepth




[flaml.automl.logger: 12-22 23:05:13] {2391} INFO -  at 2882.9s,	estimator xgb_limitdepth's best error=0.0391,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:05:13] {2218} INFO - iteration 295, current learner xgb_limitdepth
[flaml.automl.logger: 12-22 23:05:23] {2391} INFO -  at 2893.5s,	estimator xgb_limitdepth's best error=0.0391,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:05:23] {2218} INFO - iteration 296, current learner xgboost
[flaml.automl.logger: 12-22 23:07:16] {2391} INFO -  at 3006.5s,	estimator xgboost's best error=0.0437,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:07:16] {2218} INFO - iteration 297, current learner xgb_limitdepth
[flaml.automl.logger: 12-22 23:07:26] {2391} INFO -  at 3015.9s,	estimator xgb_limitdepth's best error=0.0391,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:07:26] {2218} INFO - iteration 298, current learner xgb_limitdepth
[flaml.automl.logger: 12-2



[flaml.automl.logger: 12-22 23:09:53] {2391} INFO -  at 3162.9s,	estimator lrl1's best error=0.2489,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:09:53] {2218} INFO - iteration 305, current learner lgbm




[flaml.automl.logger: 12-22 23:10:06] {2391} INFO -  at 3176.1s,	estimator lgbm's best error=0.0425,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:10:06] {2218} INFO - iteration 306, current learner xgb_limitdepth
[flaml.automl.logger: 12-22 23:10:46] {2391} INFO -  at 3216.7s,	estimator xgb_limitdepth's best error=0.0391,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:10:46] {2218} INFO - iteration 307, current learner xgb_limitdepth
[flaml.automl.logger: 12-22 23:10:54] {2391} INFO -  at 3224.3s,	estimator xgb_limitdepth's best error=0.0391,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:10:54] {2218} INFO - iteration 308, current learner xgb_limitdepth
[flaml.automl.logger: 12-22 23:11:23] {2391} INFO -  at 3253.3s,	estimator xgb_limitdepth's best error=0.0391,	best estimator rf's best error=0.0379
[flaml.automl.logger: 12-22 23:11:23] {2218} INFO - iteration 309, current learner xgboost
[flaml.automl.logger: 12-22 2

In [None]:
# Get the best model found by FLAML
best_model = automl.model

In [None]:
# Make predictions
y_pred = best_model.predict(X_test)

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9671
