Write Python code to create a class, IrisDataAnalyzer, that can read an Iris dataset from a specified CSV file path, calculate the average sizes of sepal length, sepal width, petal length, and petal width for each species, and return the results as a pandas DataFrame. Additionally, provide an example usage of the class with a given CSV file ('iris_dataset.csv').

In [1]:
import pandas as pd

In [3]:
data = pd.read_csv("E:/Veena/AIML/Assignment/day_06/iris_dataset.csv")

In [4]:
data

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,Species
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


In [5]:
data.describe()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [7]:
data.columns

Index(['sepal.length', 'sepal.width', 'petal.length', 'petal.width',
       'Species'],
      dtype='object')

In [31]:
data['Species'].value_counts()

Species
Setosa        50
Versicolor    50
Virginica     50
Name: count, dtype: int64

In [38]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal.length  150 non-null    float64
 1   sepal.width   150 non-null    float64
 2   petal.length  150 non-null    float64
 3   petal.width   150 non-null    float64
 4   Species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [59]:
class IrisDataAnalyzer:
   
    def __init__(self,filePath):
        """
        reads a file from specified path
        :param:filepath
        """
        self.filePath = filePath
        self.irisData=None
        
    def readData(self):
        self.irisData = pd.read_csv(self.filePath)
    
    def avgSize(self):
        """
        calculates mean after grouping the species
        """
        avgSpecies = self.irisData.groupby('Species').mean()
        return avgSpecies
    
    def findUnique(self):
        """
        To find  unique species
        """
        uniqueSpecies = self.irisData['Species'].unique()
        return uniqueSpecies
    def countUniqueCategory(self):
        """
        To find the unique categories count in the Species feature
        """        
        cntUnqSpecies = self.irisData['Species'].value_counts()
        return cntUnqSpecies
        
    def describeDataframe(self):
        """
        Get the dataframe description to get to know the dataframe better?
        """
        df = self.irisData.describe()
        return df
    
    def getListOfNumColumns(self):
        """ 
        get the list of numberic columns"""
        lstNum = []
        lstNum =list(self.irisData.dtypes[self.irisData.dtypes == 'float64'].index)
        return lstNum
    
    def getListOfObjColumns(self):
        """ 
        get the list of string columns """
        lstObj = []
        lstObj = list(self.irisData.dtypes[self.irisData.dtypes == 'object'].index)
        return lstObj
        
    def dfOfRecNWidLen(self):
        """
        Filter the dataframe for records """
        lst = []
        lst = self.irisData.loc[[5,18,96,23,78], ['sepal.width', 'petal.length', 'Species']] 
        return lst
        
    def getCellValue(self,r:int,c:int):
        """
        cell value at the position row=3 and column index = 2
        param1:int
        param2:int
        :return : int
        """
        cellVal = self.irisData.iat[r,c]
        return cellVal
    
    def showAvgDifference(self):
        """
        Find the difference between average of petal.width & petal.length"""
        pwidth = self.irisData['petal.width'].mean()
        plength = self.irisData['petal.length'].mean()
        diff = pwidth - plength
        return diff
    
ida = IrisDataAnalyzer("E:/Veena/AIML/Assignment/day_06/iris_dataset.csv")
ida.readData()
avgSpecies = ida.avgSize()

# 1. Average of species
print(f"\n1. Average of species \n{avgSpecies}")

# 2. To find  unique species
unqSpecies = ida.findUnique()        
print(f"\n2. Unique species \n{unqSpecies}") 

# 3. To find the unique categories count in the Species feature
cntUnqSpecies = ida.countUniqueCategory()
print(f"\n3. Unique category value count for species \n{cntUnqSpecies}")

# 4. Get the dataframe description to get to know the dataframe better?
print(f"\n4. Get the dataframe description to get to know the dataframe better \n")
df = ida.describeDataframe()
print(df)

# 5. Get the list of 1. Numerical columns, 2. Categorical columns dynamically
print(f"\n5. Get the list of 1. Numerical columns, 2. Categorical columns dynamically\n\t")
dfNum = ida.getListOfNumColumns()
print(f"\n\t 1. List of Numerical Columns {dfNum}")
dfObj = ida.getListOfObjColumns()
print(f"\n\t 2. List of Object Columns {dfObj}")

# 6. Filter the dataframe for records - 5,18,96,23,78 and columns - sepal.width, petal.length, Species.
dfRecLenWid = ida.dfOfRecNWidLen()
print(f"\n 6. Filter the dataframe for records - 5,18,96,23,78 and columns - sepal.width, petal.length, Species.\n\n {dfRecLenWid}")

# 7. cell value at the position row=3 and column index = 2
r = int(input("Enter row number : "))
c = int(input("Enter column number : "))
cellVal = ida.getCellValue(r,c)
print(f"\n\n 7. cell value at the position row=3 and column index = 2 {cellVal}")

# 8. Find the difference between average of petal.width & petal.length
diff = ida.showAvgDifference()
print(f"\n\n 8. The difference between average of petal.width & petal.length {diff}")



1. Average of species 
            sepal.length  sepal.width  petal.length  petal.width
Species                                                         
Setosa             5.006        3.428         1.462        0.246
Versicolor         5.936        2.770         4.260        1.326
Virginica          6.588        2.974         5.552        2.026

2. Unique species 
['Setosa' 'Versicolor' 'Virginica']

3. Unique category value count for species 
Species
Setosa        50
Versicolor    50
Virginica     50
Name: count, dtype: int64

4. Get the dataframe description to get to know the dataframe better 

       sepal.length  sepal.width  petal.length  petal.width
count    150.000000   150.000000    150.000000   150.000000
mean       5.843333     3.057333      3.758000     1.199333
std        0.828066     0.435866      1.765298     0.762238
min        4.300000     2.000000      1.000000     0.100000
25%        5.100000     2.800000      1.600000     0.300000
50%        5.800000     3.000000 