In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split


# Step 1: Load and Explore the Dataset
iris_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris_data = pd.read_csv(iris_url, names=names)

# Step 3: Preprocessing of  the Data
# Separate features and target variable
X = iris_data.drop('species', axis=1)
y = iris_data['species']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Feature Scaling
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Train a Classifier
# Train a k-Nearest Neighbors classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Step 6: Make Predictions on Test Set
# Make predictions on the test set
y_pred = knn.predict(X_test)

# Step 7: Evaluate the Model on Test Set
# Calculate accuracy and other metrics
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(class_report)




Accuracy: 1.00
Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [2]:

# Step 3: Save the DataFrame as a CSV file
iris_data.to_csv("iris_dataset.csv", index=False)




In [3]:
# Step 8: Predict and Save the Predicted Species for New Data (test.csv)
# Load the test data
test_data = pd.read_csv("test.csv")

# Standardize the test data
X_test_new = scaler.transform(test_data)

# Make predictions on the new test data
y_pred_new = knn.predict(X_test_new)



# # Map numerical class labels back to their actual names (as strings)
# species_mapping = {0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}
# y_pred_species = [species_mapping[str(label)] for label in y_pred_new]

# Add the predicted species to the test_data DataFrame
test_data['predicted_species'] = y_pred_new

# Save the test_data with predicted species to a new CSV file
test_data.to_csv("predicted_species.csv", index=False)

