# **Internship Task 3 : IRIS Flowers Classification**

In [None]:
#importing the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# Importing the dataset
# We are importing the Iris dataset, a well-known dataset in machine learning,
# often used for classification tasks. The dataset contains samples of three
# different species of iris flowers, with various measurements.
iris = load_iris()
# Extracting features
# We are assigning the feature data (also known as the independent variables)
# from the imported Iris dataset to the variable 'X'. This will contain the
# measurements of sepal length, sepal width, petal length, and petal width
# for each iris sample.
X = iris.data
# Extracting labels
# We are assigning the target labels (also known as the dependent variable)
# from the imported Iris dataset to the variable 'y'. This will contain the
# species label (0, 1, or 2) corresponding to each iris sample.
y = iris.target

In [None]:
# The target_names attribute of the Iris dataset contains the names of the classes
# that the machine learning model aims to predict.
# The classes represent the different species of iris flowers: setosa, versicolor, and virginica.
# These names are stored as an array in the target_names attribute
iris.target_names

In [None]:
#Transpose of dataset
# The 'iris.data' attribute holds the features (attributes) of the iris flowers.
# These features include sepal length, sepal width, petal length, and petal width.
# By taking the transpose of 'iris.data', we organize the data so that each row corresponds
# to a specific feature, and each column corresponds to an individual flower.
features = iris.data.T
#assigning the feature names
# Sepal length is the first row of the transposed matrix.
sepal_length = features[0]
# Sepal width is the second row of the transposed matrix.
sepal_width = features[1]
# Petal length is the third row of the transposed matrix.
petal_length = features[2]
# Petal width is the fourth row of the transposed matrix.
petal_width = features[3]

In [None]:
# Setting labels for the plot figure
# Assigning labels for the plot figure to provide context for the plotted data.

# The feature_names attribute of the Iris dataset contains the names of the features, which are attributes
# of the iris flowers. These features include sepal length, sepal width, petal length, and petal width.
sepal_length_label = iris.feature_names[0]
sepal_width_label = iris.feature_names[1]
petal_length_label = iris.feature_names[2]
petal_width_label = iris.feature_names[3]

#plotting sepal width vs sepal length label
# Plotting a scatter plot of sepal width against sepal length, using colors based on the target labels.
plt.scatter(sepal_width, sepal_length, c=iris.target)
#Plotting sepal width and length
plt.xlabel(sepal_width_label)
# Adding a label to the y-axis to indicate the attribute being plotted.
plt.ylabel(sepal_length_label)
plt.show()

In [None]:
# Split the data set into training and testing
# The 'train_test_split' function from a machine learning library (not shown here) is used
# to partition the dataset into training and testing subsets. This is crucial for assessing
# the performance of machine learning models.
X_train, X_test, y_train, y_test = train_test_split(iris['data'], iris['target'], random_state=10)
# Creating a K-Nearest Neighbors (KNN) classifier instance.

# The KNN algorithm is a type of supervised learning that makes predictions based on
# the majority class of its 'k' nearest neighbors in the feature space.

# A KNN classifier is instantiated with 'n_neighbors' set to 3, meaning it will consider
# the 3 closest neighbors for making predictions.
KNN_Classifier = KNeighborsClassifier(n_neighbors=3)

# Training the KNN classifier using the training data.
# The 'fit' method is called on the KNN classifier instance to train the model.
# It takes 'X_train' (the training features) and 'y_train' (the corresponding target labels) as arguments.
KNN_Classifier.fit(X_train, y_train)

In [None]:
# Creating a new data point to predict its class using the trained KNN classifier.
# The new data point is represented as a NumPy array with four feature values: sepal length, sepal width,
# petal length, and petal width.
X_newIris = np.array([[5.0, 2.9, 1.0, 0.2]])

# Using the trained KNN classifier to predict the class of the new data point.
# The 'predict' method of the KNN classifier takes the new data point as an argument and returns the predicted class.
predictNew = KNN_Classifier.predict(X_newIris)
print(predictNew)

In [None]:
# Accuracy Score of KNN classifier:
print(KNN_Classifier.score(X_test, y_test))

In [None]:
y_pred = KNN_Classifier.predict(X_test)

In [None]:
#confusion matrix for the predictions
confusion_matrix(y_pred, y_test)