In [1]:
import pandas as pd

In [2]:
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

In [3]:
df = pd.read_csv('irisdata.txt', sep=',', header=None, names=columns)

In [4]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [5]:
df.groupby('class', as_index=False) \
    .agg({'sepal_width':'count'})

Unnamed: 0,class,sepal_width
0,Iris-setosa,50
1,Iris-versicolor,50
2,Iris-virginica,50


In [6]:
class_count = df.groupby('class', as_index=False) \
    .agg({'petal_width':'count'}) \
    .rename(columns={'petal_width': 'count'})

In [7]:
class_count

Unnamed: 0,class,count
0,Iris-setosa,50
1,Iris-versicolor,50
2,Iris-virginica,50


In [8]:
total_sep_len_mean = round(df['sepal_length'].mean(), 1)
total_sep_len_min = df['sepal_length'].min()
total_sep_len_max = df['sepal_length'].max()
total_sep_len_median = round(df['sepal_length'].median(), 1)
total_sep_len_std = round(df['sepal_length'].std(), 1)

In [9]:
total_sep_width_mean = round(df['sepal_width'].mean(), 1)
total_sep_width_min = df['sepal_width'].min()
total_sep_width_max = df['sepal_width'].max()
total_sep_width_median = round(df['sepal_width'].median(), 1)
total_sep_width_std = round(df['sepal_width'].std(), 1)

In [10]:
sepal_describe = df.groupby('class', as_index=False) \
    .agg({'sepal_width':['mean', 'min', 'median', 'max', 'std', 'count'], 'sepal_length':['mean', 'min', 'median', 'max', 'std', 'count']})

In [11]:
sepal_describe

Unnamed: 0_level_0,class,sepal_width,sepal_width,sepal_width,sepal_width,sepal_width,sepal_width,sepal_length,sepal_length,sepal_length,sepal_length,sepal_length,sepal_length
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,min,median,max,std,count,mean,min,median,max,std,count
0,Iris-setosa,3.418,2.3,3.4,4.4,0.381024,50,5.006,4.3,5.0,5.8,0.35249,50
1,Iris-versicolor,2.77,2.0,2.8,3.4,0.313798,50,5.936,4.9,5.9,7.0,0.516171,50
2,Iris-virginica,2.974,2.2,3.0,3.8,0.322497,50,6.588,4.9,6.5,7.9,0.63588,50


In [12]:
sepal_length_5 = df.query('sepal_length < 5').groupby('class', as_index=False) \
    .agg({'sepal_length': 'count'}) \
    .rename(columns={'sepal_length': 'sepal_length_5'})

sepal_length_5['sepal_length_5_%'] = sepal_length_5['sepal_length_5'].mul(100) / class_count['count']

In [13]:
sepal_length_5_6 = df.query('sepal_length >= 5 & sepal_length < 6').groupby('class', as_index=False) \
    .agg({'sepal_length': 'count'}) \
    .rename(columns={'sepal_length': 'sepal_length_5_6'})

sepal_length_5_6['sepal_length_5_6_%'] = sepal_length_5_6['sepal_length_5_6'].mul(100) / class_count['count']

In [14]:
sepal_length_6_7 = df.query('sepal_length >= 6 & sepal_length < 7').groupby('class', as_index=False) \
    .agg({'sepal_length': 'count'}) \
    .rename(columns={'sepal_length': 'sepal_length_6_7'})

sepal_length_6_7['sepal_length_6_7_%'] = sepal_length_6_7['sepal_length_6_7'].mul(100) / class_count['count']

In [15]:
sepal_length_7 = df.query('sepal_length > 7').groupby('class', as_index=False) \
    .agg({'sepal_length': 'count'}) \
    .rename(columns={'sepal_length': 'sepal_length_7'}) \

sepal_length_7['sepal_length_7_%'] = sepal_length_7['sepal_length_7'].mul(100) / class_count['count']

In [16]:
pd.concat([sepal_length_5, sepal_length_5_6, sepal_length_6_7, sepal_length_7], axis=1)

Unnamed: 0,class,sepal_length_5,sepal_length_5_%,class.1,sepal_length_5_6,sepal_length_5_6_%,class.2,sepal_length_6_7,sepal_length_6_7_%,class.3,sepal_length_7,sepal_length_7_%
0,Iris-setosa,20,40.0,Iris-setosa,30,60.0,Iris-versicolor,23.0,46.0,Iris-virginica,12.0,24.0
1,Iris-versicolor,1,2.0,Iris-versicolor,25,50.0,Iris-virginica,31.0,62.0,,,
2,Iris-virginica,1,2.0,Iris-virginica,6,12.0,,,,,,


In [17]:
sepal_width_3 = df.query('sepal_width < 3').groupby('class', as_index=False) \
    .agg({'sepal_width': 'count'}) \
    .rename(columns={'sepal_width': 'sepal_width_3'})

sepal_width_3['sepal_width_3_%'] = sepal_width_3['sepal_width_3'].mul(100) / class_count['count']

In [18]:
sepal_width_3_35 = df.query('sepal_width >= 3 & sepal_width < 3.5').groupby('class', as_index=False) \
    .agg({'sepal_width': 'count'}) \
    .rename(columns={'sepal_width': 'sepal_width_3_35'})

sepal_width_3_35['sepal_width_3_35_%'] = sepal_width_3_35['sepal_width_3_35'].mul(100) / class_count['count']

In [19]:
sepal_width_35_4 = df.query('sepal_width >= 3.5 & sepal_width < 4').groupby('class', as_index=False) \
    .agg({'sepal_width': 'count'}) \
    .rename(columns={'sepal_width': 'sepal_width_35_4'})

sepal_width_35_4['sepal_width_35_4_%'] = sepal_width_35_4['sepal_width_35_4'].mul(100) / class_count['count']

In [20]:
pd.concat([sepal_width_3, sepal_width_3_35, sepal_width_35_4], axis=1)

Unnamed: 0,class,sepal_width_3,sepal_width_3_%,class.1,sepal_width_3_35,sepal_width_3_35_%,class.2,sepal_width_35_4,sepal_width_35_4_%
0,Iris-setosa,2,4.0,Iris-setosa,27,54.0,Iris-setosa,17.0,34.0
1,Iris-versicolor,34,68.0,Iris-versicolor,16,32.0,Iris-virginica,3.0,6.0
2,Iris-virginica,21,42.0,Iris-virginica,26,52.0,,,
