In [1]:
#Melt
import pandas as pd

df = pd.read_csv('breast-cancer.data', header = None)
column_names = ['Class', 'Age', 'Menopause', 'Tumor_size', 'inv_nodes', 'node_caps',
                'deg_malig', 'breast', 'breast_quad', 'irradiat']
df.columns = column_names
print(df.head(),"\n")


df_melted = pd.melt(df, id_vars=['Class', 'Age'], var_name='Variable', value_name='Value')
print(df_melted.head())

                  Class    Age Menopause Tumor_size inv_nodes node_caps  \
0  no-recurrence-events  30-39   premeno      30-34       0-2        no   
1  no-recurrence-events  40-49   premeno      20-24       0-2        no   
2  no-recurrence-events  40-49   premeno      20-24       0-2        no   
3  no-recurrence-events  60-69      ge40      15-19       0-2        no   
4  no-recurrence-events  40-49   premeno        0-4       0-2        no   

   deg_malig breast breast_quad irradiat  
0          3   left    left_low       no  
1          2  right    right_up       no  
2          2   left    left_low       no  
3          2  right     left_up       no  
4          2  right   right_low       no   

                  Class    Age   Variable    Value
0  no-recurrence-events  30-39  Menopause  premeno
1  no-recurrence-events  40-49  Menopause  premeno
2  no-recurrence-events  40-49  Menopause  premeno
3  no-recurrence-events  60-69  Menopause     ge40
4  no-recurrence-events  40-49  Me

In [2]:
#Pivot
df_no_duplicates = df.drop_duplicates(['Class', 'Age'])
df_pivoted = df_no_duplicates.pivot(index=['Class', 'Age'], columns='Menopause')

print(df_pivoted.head())

                           Tumor_size         inv_nodes         node_caps  \
Menopause                        ge40 premeno      ge40 premeno      ge40   
Class                Age                                                    
no-recurrence-events 20-29        NaN   35-39       NaN     0-2       NaN   
                     30-39        NaN   30-34       NaN     0-2       NaN   
                     40-49        NaN   20-24       NaN     0-2       NaN   
                     50-59        NaN   25-29       NaN     0-2       NaN   
                     60-69      15-19     NaN       0-2     NaN        no   

                                   deg_malig         breast          \
Menopause                  premeno      ge40 premeno   ge40 premeno   
Class                Age                                              
no-recurrence-events 20-29      no       NaN     2.0    NaN   right   
                     30-39      no       NaN     3.0    NaN    left   
                     40-49  

In [3]:
#Groupby
#Aggregation

#group by columns class and age
grouped_df = df.groupby(['Class', 'Age'])
#apply aggregation to groups
aggregated_df = grouped_df.agg({
    'Tumor_size': 'first',
    'inv_nodes': 'first',
    'deg_malig': 'first'
})

print(aggregated_df)

                           Tumor_size inv_nodes  deg_malig
Class                Age                                  
no-recurrence-events 20-29      35-39       0-2          2
                     30-39      30-34       0-2          3
                     40-49      20-24       0-2          2
                     50-59      25-29       0-2          2
                     60-69      15-19       0-2          2
                     70-79      20-24       0-2          3
recurrence-events    30-39        0-4       0-2          2
                     40-49      40-44       0-2          1
                     50-59      15-19       0-2          2
                     60-69      40-44       0-2          2
                     70-79      15-19      9-11          1


In [4]:
#Iteration
grouped_df = df.groupby(['Class', 'Age'])

# Calculate the frequency of each breast category
for name, group in grouped_df:
    breast_counts = group['breast'].value_counts()
    print(f"Group: {name}")
    print(breast_counts, "\n")

Group: ('no-recurrence-events', '20-29')
breast
right    1
Name: count, dtype: int64 

Group: ('no-recurrence-events', '30-39')
breast
left     11
right    10
Name: count, dtype: int64 

Group: ('no-recurrence-events', '40-49')
breast
right    36
left     27
Name: count, dtype: int64 

Group: ('no-recurrence-events', '50-59')
breast
left     39
right    32
Name: count, dtype: int64 

Group: ('no-recurrence-events', '60-69')
breast
left     23
right    17
Name: count, dtype: int64 

Group: ('no-recurrence-events', '70-79')
breast
left     3
right    2
Name: count, dtype: int64 

Group: ('recurrence-events', '30-39')
breast
left     10
right     5
Name: count, dtype: int64 

Group: ('recurrence-events', '40-49')
breast
left     14
right    13
Name: count, dtype: int64 

Group: ('recurrence-events', '50-59')
breast
left     17
right     8
Name: count, dtype: int64 

Group: ('recurrence-events', '60-69')
breast
right    10
left      7
Name: count, dtype: int64 

Group: ('recurrence-events'