## Import Libraries

In [None]:
import pandas as pd
import numpy as np

## Read Dataset

In [None]:
df=pd.read_csv("Iris.csv")
df.head()

## Initial Inspection

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

## Display of null values 

In [None]:
print(df[df.isna().any(axis=1)])

In [None]:
df1=df.copy(deep=True)

In [None]:
df1.isna().sum()

## Dropping of Rows/Columns

In [None]:
df1.dropna(axis=0,inplace=True) # inplace to overwrite in same record axis=0 - drop rows

In [None]:
df1.isna().sum()

In [None]:
df.isna().sum()

In [None]:
df2=df.copy(deep=True)
df2.isna().sum()

## Imputing Missing Values

In [None]:
m=df2["PetalLengthCm"].median()
df2["PetalLengthCm"]=df2["PetalLengthCm"].fillna(m)
df2.iloc[[20,37],:]

In [None]:
df=pd.read_csv("Iris.csv")
from sklearn.impute import SimpleImputer
imp=SimpleImputer(missing_values=np.nan,strategy='mean')
df_n=df.iloc[:,0:4]
imputed=imp.fit_transform(df_n)
imputed

In [None]:
names=["sepal_len","sepal_width","petal_len","petal_width"]
df3=pd.DataFrame(imputed,columns=names)

In [None]:
df3["Species"]=df["Species"].fillna(method="bfill")
df3.iloc[[28],:]

In [None]:
df4=df3.copy(deep=True)
df4.shape

In [None]:
df4.isna().sum()

## Handling Categorical Data

In [None]:
df4["Species"].value_counts()

In [None]:
mapping={"Iris-setosa":3,
         "Iris-virginica":2,
        "Iris-versicolor":1}
df4["Species"]=df4["Species"].map(mapping)

In [None]:
df4.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(df3['Species'])
y

## Inverse Encoding

In [None]:
actual=le.inverse_transform(y)

In [None]:
actual

## One hot Encoding

In [None]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
X=df3[["Species"]]
ohe.fit_transform(X).toarray()

In [None]:
pd.get_dummies(df['Species'])

## Feature Scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler
x=df4.iloc[:,0:5]
mmscaler = MinMaxScaler()
x_scaled = mmscaler.fit_transform(x)

In [None]:
x_scaled

## Select significant features

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2,f_regression
y=df4['Species'] # numeric predictor variable so, use f_regression in selectKbest
#if it is categorical use chi2
X=df4.iloc[:,0:4]
selectk = SelectKBest(chi2, k=2)
X_new=selectk.fit_transform(X, y)
selectk.pvalues_

In [None]:
df_new=pd.DataFrame(X_new)
df_new

In [None]:
from sklearn.model_selection import train_test_split
X=df3.iloc[:,0:4]
y=df3['Species']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=1,shuffle=True)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
df3.shape

In [None]:
y_train.value_counts()