### Example 1: Dataframegroupby type doesn't like the .isin() or .sortvalues() methods.

In [1]:
import pandas as pd
import numpy as np

file = "Resources/purchase_data.csv"

df = pd.read_csv(file)
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [5]:
# QUESTION
# This throws an error due to sort_values:
# "You can't sort values since it's a DataFrameGroupBy object: .sort_values(by='Item ID')""
# This led me to the world of apply/lambda, query, and filter. Which should be used here? Which should not.
df_type_groupby = df[['Item ID','Item Name','Price']].groupby(['Item ID']).sort_values(by='Price')

df_type_groupby

pandas.core.groupby.groupby.DataFrameGroupBy

In [4]:
# QUESTION
# Similar issue here. 
# 'DataFrameGroupBy' object doesn't like the `.isin` method.
# How can I use apply plus a lambda function with a list?

items = ['Nirvana','Final Critic']
df_type_groupby = df[['Item ID','Item Name','Price']].groupby(['Item ID']).isin(items)


AttributeError: Cannot access callable attribute 'isin' of 'DataFrameGroupBy' objects, try using the 'apply' method

## Question 2: why does the location of the column renaming matter?
This in not related to the nature of 'DataFrameGroupBy' objects. It seems to be a general dataframe thing.

In [5]:
# Setup
import pandas as pd

# Generate dataframe.

data = {'Name': ['Joe','Bob', 'Suzy', 'Sam'],
        'Fave Color': ['Black','White','White','Magenta'],
        'Earnings': [100,200,444,382],
        'Rating': [5,3,2,5],
        'Height': [5,7,6,4]
       }
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Fave Color,Earnings,Rating,Height
0,Joe,Black,100,5,5
1,Bob,White,200,3,7
2,Suzy,White,444,2,6
3,Sam,Magenta,382,5,4


In [6]:
# Group data by name to create multi-level index.
df_colors = df[['Earnings','Rating','Fave Color']].groupby(['Fave Color']).agg({'Earnings': ["mean","sum"],'Rating': "mean"})#.reset_index()

# Print type of dataframe.
print(type(df_colors))

# Show dataframe.
df_colors

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0_level_0,Earnings,Earnings,Rating
Unnamed: 0_level_1,mean,sum,mean
Fave Color,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Black,100,100,5.0
Magenta,382,382,5.0
White,322,644,2.5


In [7]:
# Combine multi-level index into one index.
df_colors.columns = [''.join(col).strip() for col in df_colors.columns.values]
df_colors

Unnamed: 0_level_0,Earningsmean,Earningssum,Ratingmean
Fave Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Black,100,100,5.0
Magenta,382,382,5.0
White,322,644,2.5


In [8]:
# Rename columns.
df_colors.rename(columns={'Earningsmean':'Avg. Earnings','Earningssum':"Total Earnings","Ratingmean":"Avg. Rating"})

Unnamed: 0_level_0,Avg. Earnings,Total Earnings,Avg. Rating
Fave Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Black,100,100,5.0
Magenta,382,382,5.0
White,322,644,2.5


In [9]:
# If we filter the dataframe for any set of values, the columns revert to their original names.
# QUESTION -- why? :) 

colors = ['Black','Magenta']
df_colors[df_colors.index.isin(colors)].sort_values('Earningssum',ascending=False)

Unnamed: 0_level_0,Earningsmean,Earningssum,Ratingmean
Fave Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Magenta,382,382,5.0
Black,100,100,5.0
