In [1]:
import numpy as np
import pandas as pd

In [2]:
# Load the dataset
df = pd.read_csv('Iris.csv')

In [3]:
# Display the first 5 rows of the DataFrame
 
print(df.head(5)) 

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa


In [4]:
# Display summary information about the DataFrame
print(df.info()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB
None


### Selecting Data

In [5]:
 # Access a specific column
 column_data = df['Species']  
 column_data

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: Species, Length: 150, dtype: object

In [10]:
# filtering Data
# Filter rows where SepalLength is less than 5.0

filtered_data = df[df['SepalLengthCm'] < 5.0]  
print(filtered_data)

      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \
1      2            4.9           3.0            1.4           0.2   
2      3            4.7           3.2            1.3           0.2   
3      4            4.6           3.1            1.5           0.2   
6      7            4.6           3.4            1.4           0.3   
8      9            4.4           2.9            1.4           0.2   
9     10            4.9           3.1            1.5           0.1   
11    12            4.8           3.4            1.6           0.2   
12    13            4.8           3.0            1.4           0.1   
13    14            4.3           3.0            1.1           0.1   
22    23            4.6           3.6            1.0           0.2   
24    25            4.8           3.4            1.9           0.2   
29    30            4.7           3.2            1.6           0.2   
30    31            4.8           3.1            1.6           0.2   
34    35            

###  Filter Based on Multiple Conditions

In [11]:
# Filter rows where SepalLength is greater than 5 AND PetalWidth is less than 1.5
filtered_data = df[(df['SepalLengthCm'] > 5) & (df['PetalWidthCm'] < 1.5)]
print(filtered_data)

      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \
0      1            5.1           3.5            1.4           0.2   
5      6            5.4           3.9            1.7           0.4   
10    11            5.4           3.7            1.5           0.2   
14    15            5.8           4.0            1.2           0.2   
15    16            5.7           4.4            1.5           0.4   
16    17            5.4           3.9            1.3           0.4   
17    18            5.1           3.5            1.4           0.3   
18    19            5.7           3.8            1.7           0.3   
19    20            5.1           3.8            1.5           0.3   
20    21            5.4           3.4            1.7           0.2   
21    22            5.1           3.7            1.5           0.4   
23    24            5.1           3.3            1.7           0.5   
27    28            5.2           3.5            1.5           0.2   
28    29            

### Count Unique Values in a Column

In [12]:
# Count the number of unique species
print(df['Species'].value_counts())

Species
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: count, dtype: int64


### Create a New Column (Feature Engineering)

In [13]:
# Create a new column for Sepal Area
df['SepalArea'] = df['SepalLengthCm'] * df['SepalWidthCm']
print(df.head())

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species  \
0   1            5.1           3.5            1.4           0.2  Iris-setosa   
1   2            4.9           3.0            1.4           0.2  Iris-setosa   
2   3            4.7           3.2            1.3           0.2  Iris-setosa   
3   4            4.6           3.1            1.5           0.2  Iris-setosa   
4   5            5.0           3.6            1.4           0.2  Iris-setosa   

   SepalArea  
0      17.85  
1      14.70  
2      15.04  
3      14.26  
4      18.00  


In [14]:
# Sort by SepalLength in descending order
sorted_df = df.sort_values(by='SepalLengthCm', ascending=False)
print(sorted_df.head())

      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \
131  132            7.9           3.8            6.4           2.0   
135  136            7.7           3.0            6.1           2.3   
122  123            7.7           2.8            6.7           2.0   
117  118            7.7           3.8            6.7           2.2   
118  119            7.7           2.6            6.9           2.3   

            Species  SepalArea  
131  Iris-virginica      30.02  
135  Iris-virginica      23.10  
122  Iris-virginica      21.56  
117  Iris-virginica      29.26  
118  Iris-virginica      20.02  


In [15]:
# Group by Species and calculate the mean of each numeric column
grouped_data = df.groupby('Species').mean()
print(grouped_data)

                    Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  \
Species                                                              
Iris-setosa       25.5          5.006         3.418          1.464   
Iris-versicolor   75.5          5.936         2.770          4.260   
Iris-virginica   125.5          6.588         2.974          5.552   

                 PetalWidthCm  SepalArea  
Species                                   
Iris-setosa             0.244    17.2088  
Iris-versicolor         1.326    16.5262  
Iris-virginica          2.026    19.6846  
