#  Iris Dataset

## Task 1: Load the Dataset 

### 1. Load the CSV file using np.genfromtxt() or np.loadtxt() (skip the header if needed).

In [70]:
import numpy as np
data = np.genfromtxt("iris.csv", delimiter=",", skip_header=1)
print("Data:\n",data[:5])

Data:
 [[1.  5.1 3.5 1.4 0.2 nan]
 [2.  4.9 3.  1.4 0.2 nan]
 [3.  4.7 3.2 1.3 0.2 nan]
 [4.  4.6 3.1 1.5 0.2 nan]
 [5.  5.  3.6 1.4 0.2 nan]]


### 2. Slice out the numerical columns into a separate NumPy array (4 features only). 

In [71]:
features = data[: , 1:5]
# features = np.transpose(numerical_columns)
print("Features: \n",features[:5])

Features: 
 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]


### 3. Print the shape of the resulting NumPy array. 

In [72]:
features_shape = np.shape(features)
print("Shape of features array: ",features_shape)

Shape of features array:  (150, 4)


#### Extracting Individual Feature

In [73]:
sepal_length = features[:,0]
sepal_width = features[:,1]
petal_length = features[:,2]
petal_width = features[:,3]
print("Sepal length: \n", sepal_length)
print("Sepal width: \n", sepal_width)
print("petal length: \n", petal_length)
print("petal width: \n", petal_width)

Sepal length: 
 [5.1 4.9 4.7 4.6 5.  5.4 4.6 5.  4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1
 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.  5.  5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.
 5.5 4.9 4.4 5.1 5.  4.5 4.4 5.  5.1 4.8 5.1 4.6 5.3 5.  7.  6.4 6.9 5.5
 6.5 5.7 6.3 4.9 6.6 5.2 5.  5.9 6.  6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1
 6.3 6.1 6.4 6.6 6.8 6.7 6.  5.7 5.5 5.5 5.8 6.  5.4 6.  6.7 6.3 5.6 5.5
 5.5 6.1 5.8 5.  5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6.  6.9 5.6 7.7 6.3 6.7 7.2
 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7 6.3 6.4 6.  6.9 6.7 6.9 5.8 6.8
 6.7 6.7 6.3 6.5 6.2 5.9]
Sepal width: 
 [3.5 3.  3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 3.7 3.4 3.  3.  4.  4.4 3.9 3.5
 3.8 3.8 3.4 3.7 3.6 3.3 3.4 3.  3.4 3.5 3.4 3.2 3.1 3.4 4.1 4.2 3.1 3.2
 3.5 3.1 3.  3.4 3.5 2.3 3.2 3.5 3.8 3.  3.8 3.2 3.7 3.3 3.2 3.2 3.1 2.3
 2.8 2.8 3.3 2.4 2.9 2.7 2.  3.  2.2 2.9 2.9 3.1 3.  2.7 2.2 2.5 3.2 2.8
 2.5 2.8 2.9 3.  2.8 3.  2.9 2.6 2.4 2.4 2.7 2.7 3.  3.4 3.1 2.3 3. 

## Task 2: Basic Array Operations 



### 1. Compute the mean, max, and min for each column. 


In [74]:
mean = np.mean(features,axis=0)
max = np.max(features,axis=0)
min = np.min(features,axis=0)
print("Mean: ", mean)
print("Max: ", max)
print("Min: ", min)


Mean:  [5.84333333 3.054      3.75866667 1.19866667]
Max:  [7.9 4.4 6.9 2.5]
Min:  [4.3 2.  1.  0.1]


### 2. Calculate the standard deviation and variance for the dataset. 

In [75]:
variance = np.var(features, axis=0)
standard_deviation = np.std(features, axis=0)
print("Variance: ", np.round(variance, 2))
print("Standard deviation: ", np.round(standard_deviation, 2))

Variance:  [0.68 0.19 3.09 0.58]
Standard deviation:  [0.83 0.43 1.76 0.76]


### 3. Normalize the data using Z-score normalization: 
z=x−μσz = \frac{x - \mu}{\sigma} 

In [76]:
# Z-score normalization
normalized_features = (features - mean) / standard_deviation

# Display the first 5 rows of normalized data
print("Normalized Features:\n", np.round(normalized_features[:5], 2))

Normalized Features:
 [[-0.9   1.03 -1.34 -1.31]
 [-1.14 -0.12 -1.34 -1.31]
 [-1.39  0.34 -1.4  -1.31]
 [-1.51  0.11 -1.28 -1.31]
 [-1.02  1.26 -1.34 -1.31]]


## Task 3: Indexing and Slicing 

### 1. Extract only the Sepal Length column. 

In [77]:
sepal_length = features[:,0]
print("Sepal length: \n", sepal_length)

Sepal length: 
 [5.1 4.9 4.7 4.6 5.  5.4 4.6 5.  4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4 5.1
 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.  5.  5.2 5.2 4.7 4.8 5.4 5.2 5.5 4.9 5.
 5.5 4.9 4.4 5.1 5.  4.5 4.4 5.  5.1 4.8 5.1 4.6 5.3 5.  7.  6.4 6.9 5.5
 6.5 5.7 6.3 4.9 6.6 5.2 5.  5.9 6.  6.1 5.6 6.7 5.6 5.8 6.2 5.6 5.9 6.1
 6.3 6.1 6.4 6.6 6.8 6.7 6.  5.7 5.5 5.5 5.8 6.  5.4 6.  6.7 6.3 5.6 5.5
 5.5 6.1 5.8 5.  5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8 7.1 6.3 6.5 7.6 4.9 7.3
 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7 6.  6.9 5.6 7.7 6.3 6.7 7.2
 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7 6.3 6.4 6.  6.9 6.7 6.9 5.8 6.8
 6.7 6.7 6.3 6.5 6.2 5.9]


### 2. Get the values for the first 10 flowers. 

In [78]:
features[:10]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1]])

### 3. Extract flowers where Petal Length > 1.5. 

In [79]:
flowers_with_long_petals = np.where(petal_length>1.5,petal_length,0).nonzero()
print("Flowers with long petals:\n", flowers_with_long_petals)

Flowers with long petals:
 (array([  5,  11,  18,  20,  23,  24,  25,  26,  29,  30,  43,  44,  46,
        50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,
        63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,
        76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,
        89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101,
       102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
       115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
       128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
       141, 142, 143, 144, 145, 146, 147, 148, 149], dtype=int64),)


## Task 4: Advanced Operations 

### 1. Find the Euclidean distance between the first two rows. 

In [80]:
# First two rows of the dataset
# row1 = features[0]
# row2 = features[1]

# # Euclidean distance
# distance = np.linalg.norm(row1 - row2)

# print("Euclidean distance between first two rows:", distance)
row1 = features[0]
row2 = features[1]

distance = np.linalg.norm(row1 - row2)
print("Euclidean distance between first two rows:", distance)

Euclidean distance between first two rows: 0.5385164807134502


### 2. Count how many flowers have Sepal Width greater than the mean. 

In [81]:
# mean_sepal_width = sepal_width.mean()
# np.where(sepal_width > mean_sepal_width, 1, 0 ).sum()
mean_sepal_width = sepal_width.mean()
greater_than_mean = sepal_width > mean_sepal_width

count = np.sum(greater_than_mean)
print("No of flowers having sepal width greater than mean: ", count)

No of flowers having sepal width greater than mean:  67


### 3. Multiply two columns element-wise (e.g., SepalLength * PetalLength). 


In [82]:
new_column = sepal_length * petal_length
new_column

array([ 7.14,  6.86,  6.11,  6.9 ,  7.  ,  9.18,  6.44,  7.5 ,  6.16,
        7.35,  8.1 ,  7.68,  6.72,  4.73,  6.96,  8.55,  7.02,  7.14,
        9.69,  7.65,  9.18,  7.65,  4.6 ,  8.67,  9.12,  8.  ,  8.  ,
        7.8 ,  7.28,  7.52,  7.68,  8.1 ,  7.8 ,  7.7 ,  7.35,  6.  ,
        7.15,  7.35,  5.72,  7.65,  6.5 ,  5.85,  5.72,  8.  ,  9.69,
        6.72,  8.16,  6.44,  7.95,  7.  , 32.9 , 28.8 , 33.81, 22.  ,
       29.9 , 25.65, 29.61, 16.17, 30.36, 20.28, 17.5 , 24.78, 24.  ,
       28.67, 20.16, 29.48, 25.2 , 23.78, 27.9 , 21.84, 28.32, 24.4 ,
       30.87, 28.67, 27.52, 29.04, 32.64, 33.5 , 27.  , 19.95, 20.9 ,
       20.35, 22.62, 30.6 , 24.3 , 27.  , 31.49, 27.72, 22.96, 22.  ,
       24.2 , 28.06, 23.2 , 16.5 , 23.52, 23.94, 23.94, 26.66, 15.3 ,
       23.37, 37.8 , 29.58, 41.89, 35.28, 37.7 , 50.16, 22.05, 45.99,
       38.86, 43.92, 33.15, 33.92, 37.4 , 28.5 , 29.58, 33.92, 35.75,
       51.59, 53.13, 30.  , 39.33, 27.44, 51.59, 30.87, 38.19, 43.2 ,
       29.76, 29.89,

## Task 5: Array Reshaping and Stacking 


### 1. Reshape the array to simulate batches of size 30. 

In [83]:
print("Current array shape: ", features_shape)
batched_data= features.reshape(5,30,4)
print("Shape of array after reshaping: ", batched_data)

Current array shape:  (150, 4)
Shape of array after reshaping:  [[[5.1 3.5 1.4 0.2]
  [4.9 3.  1.4 0.2]
  [4.7 3.2 1.3 0.2]
  [4.6 3.1 1.5 0.2]
  [5.  3.6 1.4 0.2]
  [5.4 3.9 1.7 0.4]
  [4.6 3.4 1.4 0.3]
  [5.  3.4 1.5 0.2]
  [4.4 2.9 1.4 0.2]
  [4.9 3.1 1.5 0.1]
  [5.4 3.7 1.5 0.2]
  [4.8 3.4 1.6 0.2]
  [4.8 3.  1.4 0.1]
  [4.3 3.  1.1 0.1]
  [5.8 4.  1.2 0.2]
  [5.7 4.4 1.5 0.4]
  [5.4 3.9 1.3 0.4]
  [5.1 3.5 1.4 0.3]
  [5.7 3.8 1.7 0.3]
  [5.1 3.8 1.5 0.3]
  [5.4 3.4 1.7 0.2]
  [5.1 3.7 1.5 0.4]
  [4.6 3.6 1.  0.2]
  [5.1 3.3 1.7 0.5]
  [4.8 3.4 1.9 0.2]
  [5.  3.  1.6 0.2]
  [5.  3.4 1.6 0.4]
  [5.2 3.5 1.5 0.2]
  [5.2 3.4 1.4 0.2]
  [4.7 3.2 1.6 0.2]]

 [[4.8 3.1 1.6 0.2]
  [5.4 3.4 1.5 0.4]
  [5.2 4.1 1.5 0.1]
  [5.5 4.2 1.4 0.2]
  [4.9 3.1 1.5 0.1]
  [5.  3.2 1.2 0.2]
  [5.5 3.5 1.3 0.2]
  [4.9 3.1 1.5 0.1]
  [4.4 3.  1.3 0.2]
  [5.1 3.4 1.5 0.2]
  [5.  3.5 1.3 0.3]
  [4.5 2.3 1.3 0.3]
  [4.4 3.2 1.3 0.2]
  [5.  3.5 1.6 0.6]
  [5.1 3.8 1.9 0.4]
  [4.8 3.  1.4 0.3]
  [5.1 3.8 1.6

### 2. Stack two feature columns horizontally. 

In [85]:
stacked = np.column_stack((sepal_length,petal_length))
stacked

array([[5.1, 1.4],
       [4.9, 1.4],
       [4.7, 1.3],
       [4.6, 1.5],
       [5. , 1.4],
       [5.4, 1.7],
       [4.6, 1.4],
       [5. , 1.5],
       [4.4, 1.4],
       [4.9, 1.5],
       [5.4, 1.5],
       [4.8, 1.6],
       [4.8, 1.4],
       [4.3, 1.1],
       [5.8, 1.2],
       [5.7, 1.5],
       [5.4, 1.3],
       [5.1, 1.4],
       [5.7, 1.7],
       [5.1, 1.5],
       [5.4, 1.7],
       [5.1, 1.5],
       [4.6, 1. ],
       [5.1, 1.7],
       [4.8, 1.9],
       [5. , 1.6],
       [5. , 1.6],
       [5.2, 1.5],
       [5.2, 1.4],
       [4.7, 1.6],
       [4.8, 1.6],
       [5.4, 1.5],
       [5.2, 1.5],
       [5.5, 1.4],
       [4.9, 1.5],
       [5. , 1.2],
       [5.5, 1.3],
       [4.9, 1.5],
       [4.4, 1.3],
       [5.1, 1.5],
       [5. , 1.3],
       [4.5, 1.3],
       [4.4, 1.3],
       [5. , 1.6],
       [5.1, 1.9],
       [4.8, 1.4],
       [5.1, 1.6],
       [4.6, 1.4],
       [5.3, 1.5],
       [5. , 1.4],
       [7. , 4.7],
       [6.4, 4.5],
       [6.9,

### 3. Create a boolean mask to filter rows with Petal Width < 0.5. 

In [86]:
thick_petal_flower = petal_width > 0.5
thick_petal_flower

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False,  True, False,
       False, False, False, False, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,