### Load the IRIS Dataset
---

In [None]:
from sklearn import datasets

iris = datasets.load_iris()
print(iris)  # Print first 5 rows


#### 1. Load the Dataset from URL
----

In [34]:
import pandas as pd

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "species"]

iris_df = pd.read_csv(url, names=columns)
print(iris_df.head())


   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


### 2. Describe
----

In [None]:
print(iris_df.describe())

#### 3. Output using Head and Tail
----

In [None]:
# Using Head
print(iris_df.head(30))

# Using Tail
print(iris_df.tail(30))


#### 4. Missing value Handling
----

In [None]:
# Creating null value 
import numpy as np

iris_df.loc[2, "sepal_length"] = np.nan  # Insert NaN in row index 2, column "sepal_length"
iris_df.loc[5, "petal_width"] = np.nan  # Insert NaN in row index 5, column "petal_width"

print(iris_df.isnull().sum())  # Shows the count of missing values per column

In [35]:

# Create a LabelEncoder instance
from sklearn.preprocessing import LabelEncoder  # Import LabelEncoder
le = LabelEncoder()
iris_df["species_encoded"] = le.fit_transform(iris_df['species'])
print(iris_df)

     sepal_length  sepal_width  petal_length  petal_width         species  \
0             5.1          3.5           1.4          0.2     Iris-setosa   
1             4.9          3.0           1.4          0.2     Iris-setosa   
2             4.7          3.2           1.3          0.2     Iris-setosa   
3             4.6          3.1           1.5          0.2     Iris-setosa   
4             5.0          3.6           1.4          0.2     Iris-setosa   
..            ...          ...           ...          ...             ...   
145           6.7          3.0           5.2          2.3  Iris-virginica   
146           6.3          2.5           5.0          1.9  Iris-virginica   
147           6.5          3.0           5.2          2.0  Iris-virginica   
148           6.2          3.4           5.4          2.3  Iris-virginica   
149           5.9          3.0           5.1          1.8  Iris-virginica   

     species_encoded  
0                  0  
1                  0  
2     

#### 5. Feature Engineering
----

In [None]:
# Adding sepal-Area and petal-area and area-ratio

iris_df["sepal_area"] = iris_df["sepal_length"] * iris_df["sepal_width"]
iris_df["petal_area"] = iris_df["petal_length"] * iris_df["petal_width"]
iris_df["area_ratio"] = iris_df["petal_area"] / iris_df["sepal_area"]

print(iris_df.head())

In [None]:
# testing the values
print(iris_df.describe())

### Handling Data
----

In [None]:
print(iris_df)

### 6. Coorelation
----

In [None]:
import matplotlib.pyplot as plt  # Correct import for matplotlib
import seaborn as sns  # Import seaborn

from sklearn.preprocessing import LabelEncoder  # Import LabelEncoder

# Label encoding the 'species' column
le = LabelEncoder()
iris_df["species_encoded"] = le.fit_transform(iris_df['species'])

# Print the updated dataframe
print(iris_df)

# Compute correlation matrix (excluding non-numeric columns like 'species' and 'species_encoded')
co_matrix = iris_df.select_dtypes(include=['float64', 'int64']).corr()

# Plot the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(co_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)

# Add title
plt.title('Correlation Matrix')

# Show the plot
plt.show()


### Data Normalization using Min max scaller

In [39]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Assuming you want to normalize the feature columns (excluding 'species')
x = iris_df.drop('species', axis=1)  # Exclude the 'species' column

# Initialize MinMaxScaler
min_max_scaler = MinMaxScaler()

# Apply transformation
x_minmax = min_max_scaler.fit_transform(x)

# Convert to DataFrame
df_minmax = pd.DataFrame(x_minmax, columns=x.columns)

# Print the first few rows of the scaled DataFrame
print('Min Max Normalized Data:', df_minmax.head())


Min Max Normalized Data:    sepal_length  sepal_width  petal_length  petal_width  species_encoded
0      0.222222     0.625000      0.067797     0.041667              0.0
1      0.166667     0.416667      0.067797     0.041667              0.0
2      0.111111     0.500000      0.050847     0.041667              0.0
3      0.083333     0.458333      0.084746     0.041667              0.0
4      0.194444     0.666667      0.067797     0.041667              0.0
