In [31]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler

dataset = pd.read_csv('riceclass.csv')
X = dataset.drop(['Class', 'id'], axis=1)
y = dataset['Class']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)


scaler = MinMaxScaler(feature_range=(0, 1))
X_train_rescaled = scaler.fit_transform(X_train)
X_test_rescaled = scaler.transform(X_test)

model = LogisticRegression(max_iter=1000)

model.fit(X_train_rescaled, y_train)

y_pred = model.predict(X_test_rescaled)


accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, average='weighted')
recall = metrics.recall_score(y_test, y_pred, average='weighted')

print("Metrics before hyperparameter optimization:")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

conf_matrix = metrics.confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Metrics before hyperparameter optimization:
Accuracy: 0.931758530183727
Precision: 0.9317570481555417
Recall: 0.931758530183727
Confusion Matrix:
[[300  30]
 [ 22 410]]


In [33]:
# Higher C values imply less regularization, but may overfit at higher values of C.
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}

model2 = LogisticRegression(max_iter=1000)


grid_search = GridSearchCV(model2, param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)


best_params = grid_search.best_params_
best_model = grid_search.best_estimator_


y_pred_tuned = best_model.predict(X_test_scaled)


accuracy_tuned = metrics.accuracy_score(y_test, y_pred_tuned)
precision_tuned = metrics.precision_score(y_test, y_pred_tuned, average='weighted')
recall_tuned = metrics.recall_score(y_test, y_pred_tuned, average='weighted')


print("\nMetrics after hyperparameter optimization:")
print(f"Accuracy: {accuracy_tuned}")
print(f"Precision: {precision_tuned}")
print(f"Recall: {recall_tuned}")


conf_matrix = metrics.confusion_matrix(y_test, y_pred_tuned)
print("Confusion Matrix:")
print(conf_matrix)




Metrics after hyperparameter optimization:
Accuracy: 0.9356955380577427
Precision: 0.935736337363704
Recall: 0.9356955380577427
Confusion Matrix:
[[301  29]
 [ 20 412]]
