In [1]:
# Import the pandas and numpy library
import pandas as pd
import numpy as np

# Load the iris.csv dataset
iris = pd.read_csv('assets/iris.csv')

# Look at the first 5 rows
print(iris.head())

#info method to print information about the data frame including the index dtype and column dtypes, 
#and non-null values 
iris.info()

   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [None]:
# Just the mean for one variable
iris['sepal_length'].mean()

In [None]:
# Just the standard deviation for one variable
iris['sepal_length'].std()

In [None]:
#However, when exploring our data we usually want to know a bit more about it. So, let’s use Pandas describe function
#to calculate  the mean, standard deviation and interquartile range IQR values for sepal_length. 

iris['sepal_length'].describe()

In [None]:
#We can call the .describe function on the iris data, and it will exclude the character columns and provide summary 
#statistics of numeric columns. 

iris.describe()

In [None]:
# summary statistics of character column
 
iris.describe(include='all')

In [None]:
# Get the mean of the petal length per group 
iris.groupby('species')['petal_length'].mean()


In [None]:
#explore summary statistics by each group
iris.groupby('species').describe()

In [None]:
import matplotlib as mpl
mpl.get_backend()

import matplotlib.pyplot as plt

In [None]:
# Calculate the mean
sepal_length_mean = iris['sepal_length'].mean()
petal_length_mean = iris['petal_length'].mean()
petal_width_mean = iris['petal_width'].mean()
sepal_width_mean = iris['sepal_width'].mean()

# Calculate the STD
sepal_lengthstd = iris['sepal_length'].std()
petal_lengthstd = iris['petal_length'].std()
petal_widthstd = iris['petal_width'].std()
sepal_widthstd = iris['sepal_width'].std()

In [None]:
# Build a bar plot
plt.bar(['sepal_length', 'petal_length', 'petal_width', 'sepal_width'],[sepal_length_mean, petal_length_mean,petal_width_mean ,sepal_width_mean])
plt.show()

In [None]:
iris_mean =iris.groupby('species').mean()
iris_mean

In [None]:
iris_std =iris.groupby('species').std()
iris_std

In [None]:
n_groups = 4

means_setosa = (5.006, 3.428, 1.462, 0.246)
std_setosa= (0.352490, 0.379064, 0.173664, 0.10538)

means_versicolor = (5.936, 2.770,4.260,1.326)
std_versicolor= (0.516171, 0.313798,0.469911,0.197753)

means_virginica = (6.588, 2.974, 5.552, 2.026)
std_virginica= (0.635880, 0.322497, 0.551895, 0.274650)

fig, ax = plt.subplots()

index = np.arange(n_groups) # the x locations for the groups
bar_width = 0.25 # the width of the bars

opacity = 0.4
error_config = {'ecolor': '0.3'} # and error bars

rects1 = ax.bar(index, means_setosa, bar_width,
                alpha=opacity, color='b',
                yerr=std_setosa, error_kw=error_config,
                label='Setosa')

rects2 = ax.bar(index + bar_width, means_versicolor, bar_width,
                alpha=opacity, color='r',
                yerr=std_versicolor, error_kw=error_config,
                label='Versicolor')

rects3 = ax.bar(index + bar_width + bar_width, means_virginica, bar_width,
                alpha=opacity, color='c',
                yerr=std_virginica, error_kw=error_config,
                label='Virginica')

#After creating the plot we can add information to plot to make it more readable. 
#So, our final step is to add some text for labels, title and axes ticks.
# add usful information
ax.set_xlabel('Variable')
ax.set_ylabel('Means')
ax.set_title('Means by variable and species')
ax.set_xticks(index + bar_width / 3)
ax.set_xticklabels(('sepal length', 'sepal width', 'petal length', 'petal width'))
ax.legend()

fig.tight_layout()
plt.show()