# Discussion 1 Demo
### Pandas Basics

In [None]:
import pandas as pd

# Dataframe

In [None]:
# stardew valley
farm = pd.DataFrame(
    {
        "Crop": ["Starfruit", "Sweet Gem Berry", "Red Cabbage", "Parsnip", "Pumpkin"], 
        "Price": [750, 3000, 260, 35, 320], 
        "Type": ["Fruit", "Fruit", "Vegetable", "Vegetable", "Vegetable"], 
        "Is red?": [False, True, True, True, False]
    }
)
farm

In [None]:
# exporting dataframe
farm.to_csv("farm.csv", index=False)

In [None]:
# importing dataframe
farm = pd.read_csv("farm.csv")
farm

# Series

In [None]:
farm["Crop"]

In [None]:
farm["Is red?"]

In [None]:
farm["Price"]

In [None]:
# series work like arrays
farm["Price"] * 148

# Select Columns

In [None]:
farm[["Crop"]]

In [None]:
farm[["Crop", "Price"]]

# loc

In [None]:
farm

In [None]:
farm.loc[1:2]

In [None]:
farm.loc[1:2, :]

In [None]:
farm.loc[:, ["Crop","Price"]]

In [None]:
farm.loc[1:2, ["Crop","Price"]]

In [None]:
farm.loc[1:2, "Crop":"Type"]

In [None]:
farm_sorted = farm.sort_values("Price")
farm_sorted

In [None]:
farm_sorted.loc[0:2]

# iloc

In [None]:
farm_sorted

In [None]:
farm_sorted.iloc[0:2] # iloc is right-end exclusive!

In [None]:
farm_sorted.iloc[1:3, 1:3]

# Boolean filtering

In [None]:
farm

In [None]:
farm[farm["Price"] > 300]

In [None]:
farm[farm["Type"] == "Vegetable"]

In [None]:
farm[(farm["Price"] > 300) & (farm["Type"] == "Vegetable")] # '&' means AND, '|' means OR

In [None]:
farm

In [None]:
my_fav_produce = ["Radish", "Tomato", "Pumpkin", "Starfruit"]
farm[farm["Crop"].isin(my_fav_produce)]

# Merge

In [None]:
shop = pd.DataFrame(
    {
        "Type": ["Vegetable", "Egg"], 
        "On sale": [True, False]
    }
)
shop

In [None]:
farm.merge(shop)

In [None]:
# retain the rows that are available in both dataframes
farm.merge(shop, on="Type", how="inner")

In [None]:
# retain all the rows
farm.merge(shop, on="Type", how="outer")

In [None]:
# retain all the rows that are available in the left dataframe
farm.merge(shop, on="Type", how="left")

In [None]:
# retain all the rows that are available in the right dataframe
farm.merge(shop, on="Type", how="right")

In [None]:
another_shop = pd.DataFrame(
    {
        "goods": ["Fruit", "Vegetable", "Egg", "Fish"], 
        "On sale": [True, False, False, False]
    }
)
another_shop

In [None]:
# use left_on right_on when the column names are not the same
farm.merge(another_shop, left_on="Type", right_on="goods", how="inner")

In [None]:
## The following will NOT be needed for lab 1

# Groupby

In [None]:
farm

In [None]:
farm.groupby("Type").mean()

In [None]:
farm.groupby("Type")[["Price"]].mean()

In [None]:
farm.groupby("Type")[["Price"]].mean().reset_index()

In [None]:
farm.groupby("Type")[["Price"]].median().reset_index()

In [None]:
farm.groupby("Type")[["Price"]].sum().reset_index()

In [None]:
farm.groupby("Type")[["Price"]].first().reset_index()

In [None]:
farm.groupby("Type")[["Price"]].min().reset_index()

In [None]:
def my_agg_func(c):
    return 42

In [None]:
farm.groupby("Type")[["Price"]].agg(my_agg_func).reset_index()

In [None]:
farm

In [None]:
farm.groupby("Type").filter(lambda df: df["Price"].mean() > 500)

# Pivot table

In [None]:
pt_farm = pd.pivot_table(data=farm, index="Type", columns="Is red?", values="Price", aggfunc=sum)
pt_farm

# Melt

In [None]:
pt_farm = pt_farm.reset_index()
pt_farm

In [None]:
pd.melt(pt_farm, id_vars=['Type'], value_vars=[False, True])