In [3]:
import pandas as pd
from pandas import Series, DataFrame

df = DataFrame([{'product_id':23, 'name':'computer', 'wholesale_price': 500,
                 'retail_price':1000, 'sales':100},
               {'product_id':96, 'name':'Python Workout', 'wholesale_price': 35,
                'retail_price':75, 'sales':1000},
               {'product_id':97, 'name':'Pandas Workout', 'wholesale_price': 35,
                'retail_price':75, 'sales':500},
               {'product_id':15, 'name':'banana', 'wholesale_price': 0.5,
                'retail_price':1, 'sales':200},
               {'product_id':87, 'name':'sandwich', 'wholesale_price': 3,
                'retail_price':5, 'sales':300},
               ])

df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales
0,23,computer,500.0,1000,100
1,96,Python Workout,35.0,75,1000
2,97,Pandas Workout,35.0,75,500
3,15,banana,0.5,1,200
4,87,sandwich,3.0,5,300


In [8]:
df['net'] = df['retail_price'] - df['wholesale_price']
df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,net
0,23,computer,500.0,1000,100,500.0
1,96,Python Workout,35.0,75,1000,40.0
2,97,Pandas Workout,35.0,75,500,40.0
3,15,banana,0.5,1,200,0.5
4,87,sandwich,3.0,5,300,2.0


In [9]:
df['net'].sort_values()

3      0.5
4      2.0
1     40.0
2     40.0
0    500.0
Name: net, dtype: float64

In [5]:
df['net'].quantile(0.75)

40.0

In [1]:
5 * .75

3.75

In [11]:
df['net'] > df['net'].quantile(0.75)

0     True
1    False
2    False
3    False
4    False
Name: net, dtype: bool

# Beyond 1


Show the ID and name of those products whose net income is in the top 25% quantile.

In [3]:
df['net'] = df['retail_price'] - df['wholesale_price']
df.loc[
    df['net'] > df['net'].quantile(0.75), 
    ['product_id', 'name']
]

Unnamed: 0,product_id,name
0,23,computer


In [4]:
# Or if you prefer to use query...
df.query('net > net.quantile(0.75)')[['product_id', 'name']]

Unnamed: 0,product_id,name
0,23,computer


# Beyond 2

Show the ID and name of products that have lower than average sales numbers, and whose wholesale price is greater than the average.

In [11]:
df.loc[
    (df['sales'] < df['sales'].mean()) &
     (df['wholesale_price'] > df['wholesale_price'].mean()), 
    ['product_id', 'name']
]

Unnamed: 0,product_id,name
0,23,computer


In [12]:
# In a case like this one, query looks much more readable
df.query('sales < sales.mean() & wholesale_price > wholesale_price.mean()')[['product_id', 'name']]

Unnamed: 0,product_id,name
0,23,computer


# Beyond 3

 Show the names, wholesale price, and retail price of products with product IDs between 80 and 100, and which sold fewer than 400 units.

In [13]:
df.loc[
    (df['product_id'] > 80) & (df['product_id'] < 100) & (df['sales'] < 400),
    ['name', 'wholesale_price', 'retail_price']
]

Unnamed: 0,name,wholesale_price,retail_price
4,sandwich,3.0,5


In [8]:
# and with query...

df.query('product_id > 80 & product_id < 100 & sales < 400')[['name', 'wholesale_price', 'retail_price']]

Unnamed: 0,name,wholesale_price,retail_price
4,sandwich,3.0,5
