Prerequisites: install pandas!

# Input

In [1]:
fruit_name  = ["dragon fruit", "star fruit", "durian", "lychee", "pomegranate", "cherimoya"]
selling_prices = [142, 52, 94, 72, 47, 86]
costs_per_unit = [21, 14, 26, 18, 27, 38]
units_sold_counts     = [9090, 6070, 3009, 9306, 2046, 6150]

# "Exploratory" computation 

In [2]:
import pandas

# This is completely dumb, but just for exaggeration purposes...
df = pandas.DataFrame()
df.loc[:, "selling_prices"]  = selling_prices
df.loc[:, "costs_per_unit"]  = costs_per_unit
df.loc[:, "units_sold_counts"]  = units_sold_counts
df.index = fruit_name

df.head()

Unnamed: 0,selling_prices,costs_per_unit,units_sold_counts
dragon fruit,142,21,9090
star fruit,52,14,6070
durian,94,26,3009
lychee,72,18,9306
pomegranate,47,27,2046


In [3]:
profit_per_fruit = df.apply(lambda x: (x["selling_prices"] - x["costs_per_unit"]) * x["units_sold_counts"], axis=1)
print(list(profit_per_fruit))

[1099890, 230660, 204612, 502524, 40920, 295200]


# "Simplify"

In [4]:
# Create dataframe in one go
df = pandas.DataFrame(
    {
        "selling_prices": selling_prices,
        "costs_per_unit": costs_per_unit,
        "units_sold_counts": units_sold_counts,
    },
    index=fruit_name,
)

df.head()

Unnamed: 0,selling_prices,costs_per_unit,units_sold_counts
dragon fruit,142,21,9090
star fruit,52,14,6070
durian,94,26,3009
lychee,72,18,9306
pomegranate,47,27,2046


In [5]:
# Compute
profit_per_fruit = (df["selling_prices"] - df["costs_per_unit"]) * df["units_sold_counts"]
print(list(profit_per_fruit))

[1099890, 230660, 204612, 502524, 40920, 295200]


In [6]:
# Without pandas
profit_per_fruit = []

for price, cost, unit in zip(selling_prices, costs_per_unit, units_sold_counts):
    profit = (price - cost) * unit
    profit_per_fruit.append(profit)

print(profit_per_fruit)

[1099890, 230660, 204612, 502524, 40920, 295200]


# Race!

In [7]:
%%timeit

# Dumbest way
df = pandas.DataFrame()
df.loc[:, "selling_prices"]  = selling_prices
df.loc[:, "costs_per_unit"]  = costs_per_unit
df.loc[:, "units_sold_counts"]  = units_sold_counts
df.index = fruit_name
profit_per_fruit = df.apply(lambda x: (x["selling_prices"] - x["costs_per_unit"]) * x["units_sold_counts"], axis=1)

1.78 ms ± 41.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [8]:
%%timeit

# Still with pandas, but sped up
df = pandas.DataFrame(
    {
        "selling_prices": selling_prices,
        "costs_per_unit": costs_per_unit,
        "units_sold_counts": units_sold_counts,
    }
    # index=fruit_name,  # Even ditching the index!!!
)

# Compute
profit_per_fruit = (df["selling_prices"] - df["costs_per_unit"]) * df["units_sold_counts"]

503 µs ± 11.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [9]:
%%timeit

# Without pandas
profit_per_fruit = []

for price, cost, units_sold in zip(selling_prices, costs_per_unit, units_sold_counts):
    profit = (price - cost) * units_sold
    profit_per_fruit.append(profit)

1 µs ± 3.23 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [10]:
%%timeit

# Without pandas, with list comprehension
profit_per_fruit = [
    (x[0] - x[1]) * x[2] for x in
    zip(selling_prices, costs_per_unit, units_sold_counts)
]

1.19 µs ± 12.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
