In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [2]:
df = pd.read_csv('Sheet1.csv', header=[0, 1])

df.columns = [' '.join(col).strip() for col in df.columns.values]
df.rename(columns={'Label Unnamed: 0_level_1': 'Label'}, inplace=True)
df = df.fillna(0)
X = df.drop('Label', axis=1)
y = df['Label']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# 7. Make predictions and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.2%}")
print("\nDetailed Report:\n", classification_report(y_test, y_pred))

Model Accuracy: 100.00%

Detailed Report:
               precision    recall  f1-score   support

       Apple       1.00      1.00      1.00         1
      Banana       1.00      1.00      1.00         1
      Orange       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



In [4]:
# (Optional) Bonus: Show the model's confidence for a specific prediction
# Let's pick the first fruit from the test set
print("\n--- Bonus: Model Confidence for a Prediction ---")
print(f"True fruit: {y_test.iloc[0]}")
print(f"Predicted fruit: {y_pred[0]}")

# Get prediction probabilities (how sure the model is)
probabilities = model.predict_proba(X_test.iloc[0:1])
for fruit, prob in zip(model.classes_, probabilities[0]):
    print(f"Probability it's {fruit}: {prob:.2%}")


--- Bonus: Model Confidence for a Prediction ---
True fruit: Apple
Predicted fruit: Apple
Probability it's Apple: 91.24%
Probability it's Banana: 3.69%
Probability it's Orange: 5.07%
