In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score


In [2]:
file_path = r'C:\Users\gfoley\OneDrive - Epiq Inc\Desktop\Plant Project\scaled_images.csv'
df = pd.read_csv(file_path)

In [3]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2343,2344,2345,2346,2347,2348,2349,2350,2351,label
0,0.616327,0.582329,0.584677,0.608871,0.575397,0.577689,0.610442,0.577075,0.579365,0.604839,...,0.49569,0.473684,0.480519,0.48927,0.471616,0.47619,0.495726,0.482456,0.486957,Tomato_Bacterial_spot
1,0.493878,0.445783,0.443548,0.483871,0.436508,0.434263,0.48996,0.442688,0.440476,0.5,...,0.711207,0.714912,0.709957,0.716738,0.720524,0.718615,0.713675,0.723684,0.721739,Tomato_Bacterial_spot
2,0.481633,0.457831,0.447581,0.479839,0.456349,0.446215,0.46988,0.44664,0.436508,0.471774,...,0.698276,0.719298,0.705628,0.699571,0.716157,0.714286,0.67094,0.692982,0.695652,Tomato_Bacterial_spot
3,0.653061,0.634538,0.641129,0.649194,0.630952,0.63745,0.634538,0.616601,0.623016,0.625,...,0.564655,0.557018,0.545455,0.545064,0.537118,0.532468,0.538462,0.535088,0.526087,Tomato_Bacterial_spot
4,0.685714,0.650602,0.653226,0.673387,0.638889,0.645418,0.670683,0.636364,0.646825,0.669355,...,0.5,0.447368,0.441558,0.484979,0.436681,0.4329,0.5,0.460526,0.456522,Tomato_Bacterial_spot


In [4]:
df['healthy?'] = (df['label'] == 'Tomato_healthy').astype(int)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2344,2345,2346,2347,2348,2349,2350,2351,label,healthy?
0,0.616327,0.582329,0.584677,0.608871,0.575397,0.577689,0.610442,0.577075,0.579365,0.604839,...,0.473684,0.480519,0.48927,0.471616,0.47619,0.495726,0.482456,0.486957,Tomato_Bacterial_spot,0
1,0.493878,0.445783,0.443548,0.483871,0.436508,0.434263,0.48996,0.442688,0.440476,0.5,...,0.714912,0.709957,0.716738,0.720524,0.718615,0.713675,0.723684,0.721739,Tomato_Bacterial_spot,0
2,0.481633,0.457831,0.447581,0.479839,0.456349,0.446215,0.46988,0.44664,0.436508,0.471774,...,0.719298,0.705628,0.699571,0.716157,0.714286,0.67094,0.692982,0.695652,Tomato_Bacterial_spot,0
3,0.653061,0.634538,0.641129,0.649194,0.630952,0.63745,0.634538,0.616601,0.623016,0.625,...,0.557018,0.545455,0.545064,0.537118,0.532468,0.538462,0.535088,0.526087,Tomato_Bacterial_spot,0
4,0.685714,0.650602,0.653226,0.673387,0.638889,0.645418,0.670683,0.636364,0.646825,0.669355,...,0.447368,0.441558,0.484979,0.436681,0.4329,0.5,0.460526,0.456522,Tomato_Bacterial_spot,0


In [5]:
file_path = r'C:\Users\gfoley\OneDrive - Epiq Inc\Desktop\Plant Project\scaled_images_healthy.csv'
df.to_csv(file_path, index=False)

In [6]:
df = df.drop('label', axis=1)

In [7]:
# Separate features and labels
X = df.drop('healthy?', axis=1).values
y = df['healthy?'].values 

In [8]:
df['healthy?'].unique()

array([0, 1])

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [10]:
# Import the LogisticRegression module from SKLearn
from sklearn.linear_model import LogisticRegression

# Assign a random_state of 1 to the function
model = LogisticRegression(random_state=1,  max_iter=1000)
model

In [11]:
# Fit the model using training data
logreg = model.fit(X_train, y_train)

In [12]:
# Make a prediction using the testing data
predictions = logreg.predict(X_test)

In [13]:
# Generate a confusion matrix for the model
matrix = confusion_matrix(y_test, predictions)
matrix = pd.DataFrame(matrix,
                     index = ['Actual Unhealthy', 'Actual Healthy'],
                     columns = ['Predicted Unhealthy', 'Predict Healthy'])
matrix

Unnamed: 0,Predicted Unhealthy,Predict Healthy
Actual Unhealthy,712,69
Actual Healthy,43,714


In [14]:
# Print the classification report for the model
target_names  = ["Healthy (0)", "Unhealthy (1)"]
report = classification_report(y_test, predictions, target_names  = target_names)

# Print the training classification report
print(report)

               precision    recall  f1-score   support

  Healthy (0)       0.94      0.91      0.93       781
Unhealthy (1)       0.91      0.94      0.93       757

     accuracy                           0.93      1538
    macro avg       0.93      0.93      0.93      1538
 weighted avg       0.93      0.93      0.93      1538

