In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Load dataset (Update file path as needed)
file_path = r'C:\Users\DELL\OneDrive\Desktop\oasis_infobyte\iris.csv'
df = pd.read_csv(file_path)

# Step 2: Check columns
print("Columns in the dataset:", df.columns)

# Step 3: Encode the target column correctly (fix name if needed)
target_col = 'Species'  # Make sure it matches exactly
df[target_col] = LabelEncoder().fit_transform(df[target_col])

# Step 4: Separate features and target
X = df.drop(target_col, axis=1)
y = df[target_col]

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train the model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Step 7: Evaluate model
y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

# Step 8: Predict first 10 samples and decode species names
first_10_preds = model.predict(X.head(10))
label_encoder = LabelEncoder()
label_encoder.fit(df[target_col])  # Refit encoder on target to decode
decoded_preds = label_encoder.inverse_transform(first_10_preds)

# Step 9: Display predictions
print("\nPredicted species of first 10 samples:")
for i, species in enumerate(decoded_preds):
    print(f"Sample {i+1}: {species}")


Columns in the dataset: Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy: 1.0

Predicted species of first 10 samples:
Sample 1: 0
Sample 2: 0
Sample 3: 0
Sample 4: 0
Sample 5: 0
Sample 6: 0
Sample 7: 0
Sample 8: 0
Sample 9: 0
Sample 10: 0
