In [None]:
# Import necessary Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl

In [None]:
# Reading and Analyzing Data
df = pd.read_csv("Iris.csv")
df

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.columns

In [None]:
df.info

In [None]:
df.dtypes

In [None]:
df.describe()

In [None]:
df.ndim

In [None]:
df.shape

In [None]:
# Checking for Inconsistent, Missing and Duplicated Values

In [None]:
df.isna().sum()

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
df['Species'].unique

In [None]:
df.info

In [None]:
# Boxplot
for i in df.columns:
  if i != 'Species':
    sns.boxplot(x='Species' ,y=i, data=df)
    plt.xlabel('Species')
    plt.ylabel(i)
    plt.title("Boxplot of %s by Species"%i)
    plt.show()

In [None]:
df.Species.value_counts() # Checking number of values in target column

In [None]:
df.Species.value_counts(normalize = True)

In [None]:
# Countplot
sns.countplot(x = "Species", data = df, color = 'pink')
plt.xlabel("Species")
plt.ylabel("Count")
plt.title("Distribution of Iris Species")
plt.show()

In [None]:
# Pairplot
sns.pairplot(df, hue='Species', palette='magma')
plt.title("Pair Plot of Iris Dataset")
plt.show()

In [None]:
# Countplot of target column with every feature
for i in df.columns:
  plt.figure(figsize=(20,5))
  sns.countplot(x=i, data=df,alpha=0.3,color='#000000')
  plt.xlabel(i)
  plt.ylabel("Count")
  plt.title("Distribution of %s"%i)
  plt.show()

In [None]:
# Converting categorical columns into numerical
from sklearn.preprocessing import LabelEncoder

labelEncode = LabelEncoder()

df['Species'] = labelEncode.fit_transform(df['Species'])
df['Species']

In [None]:
df.corr() # Create a correlation of columns

In [None]:
# Creating a correlation matrix using a heatmap
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='Pastel1')
plt.title('Correlation Matrix of Iris Dataset')
plt.show()

In [None]:
# Using train_test_split model to predict values 
X = df.drop(columns=['Id','Species'])
Y = df['Species']

from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y, test_size = 0.10)

X_test.shape

In [None]:
#Importing linear model to create model and predict values
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

In [None]:
model.fit(X_train, Y_train) # Giving values to the model

In [None]:
Y_test

In [None]:
#Import accuracy score from metrics to check the accuracy of the prediction made by the model.
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
score = accuracy_score(Y_test, y_pred)
accuracy = score*100
print(accuracy)

In [None]:
y_pred

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm = confusion_matrix(Y_test, y_pred)
print("Confusion Matrix")
print(cm)

In [None]:
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt = 'd', cmap = 'plasma', cbar=False, annot_kws={'size':14} )
plt.xlabel('Predicted Labels', fontsize=14)
plt.ylabel('True Labels', fontsize=14)
plt.title('Confusion Matrix', fontsize=16)
plt.show()

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(X_train, Y_train)

In [None]:
y_pred = clf.predict(X_test)
score = accuracy_score(Y_test, y_pred)
accuracy = score*100
print(accuracy)

In [None]:
from sklearn.metrics import confusion_matrix
y_pred = clf.predict(X_test)

#Printing Confusion Matrix
print('Confusion Matrix')
cm = confusion_matrix(Y_test, y_pred)
print(cm)

In [None]:
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap = 'turbo', 
            cbar=False, annot_kws={'size':14})
plt.xlabel("Predicted Labels", fontsize= 16)
plt.ylabel("True Labels", fontsize=16)
plt.title("Confusion Matrix", fontsize=24)
plt.show()

In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train, Y_train)

In [None]:
from sklearn.metrics import confusion_matrix
y_pred = clf.predict(X_test)

#Printing Confusion Matrix
print('Confusion Matrix')
cm = confusion_matrix(Y_test, y_pred)
print(cm)

In [None]:
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap = 'viridis', 
            cbar=False, annot_kws={'size':14})
plt.xlabel("Predicted Labels", fontsize= 16)
plt.ylabel("True Labels", fontsize=16)
plt.title("Confusion Matrix", fontsize=24)
plt.show()

In [None]:
y_pred = clf.predict(X_test)
score = accuracy_score(Y_test, y_pred)
accuracy = score*100
print(accuracy)