# Apply


In [1]:
import pandas as pd

## Primer


In [None]:
df = pd.DataFrame(
    {
        "Name": ["John", "Rock", "Neil", "Tom"],
        "Group": [
            "A",
            "A",
            "B",
            "B",
        ],
        "Marks": [78, 66, 55, 42],
    }
)
display(df)

In [None]:
# Create a new column "Marks (%)" which contains the marks in percentage
def make_percent(val):
    return str(val) + "%"


df["Marks (%)"] = df["Marks"].apply(make_percent)

display(df)

In [None]:
# Create a new column "Pass" which contains True if the student has passed the exam, False otherwise
# A student is considered to be passed if he/she has scored 70 or more marks in group A and 50 or more marks in group B.


def is_pass(row):
    group = row["Group"]
    marks = row["Marks"]
    if group == "A":
        return marks >= 70
    else:
        return marks >= 50


df["Pass"] = df.apply(is_pass, axis=1)
display(df)

## Real Data


In [None]:
df_raw = pd.read_csv("./data/student_mat.csv")
df_raw.head()

In [None]:
df_raw.info()

In [None]:
# For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column
df_stu = df_raw.loc[:, "school":"guardian"]
df_stu.head()

In [None]:
# Create a function that will capitalize strings
def capitalizer(val):
    return val.upper()


df_stu["Mjob"] = df_stu["Mjob"].apply(capitalizer)
df_stu["Fjob"] = df_stu["Fjob"].apply(capitalizer)

df_stu.head()

In [None]:
# Create a function called majority that returns a boolean value to a new column called legal_drinker
def is_legan_drinker(x):
    if x > 17:
        return True
    else:
        return False


df_stu["legal_drinker"] = df_stu["age"].apply(is_legan_drinker)
df_stu.head()

In [None]:
# Create a new column called category that will be calculated based on the age and sex of each student.
def categorize_by_age_and_sex(row):
    sex = row["sex"]
    age = row["age"]
    if age < 18:
        if sex == "F":
            return "Underage Female"
        else:
            return "Underage Male"
    else:
        if sex == "F":
            return "Adult Female"
        else:
            return "Adult Male"


df_stu["category"] = df_stu.apply(categorize_by_age_and_sex, axis=1)

df_stu[["sex", "age", "category"]].head(10)

In [None]:
df_stu.head()

### Apply with groupby

In [12]:
# Group by category
df_stu_group = df_stu.groupby("category")

In [None]:
df_stu_group.size()

In [None]:
df_stu_group.nunique()

In [None]:
# Calculate average age of teacher and others
def average_age_of_teacher(df):
    filt_teacher = df["Fjob"] == "TEACHER"
    mean_age_teacher = df.loc[filt_teacher, "age"].mean()
    mean_age_not_techer = df.loc[~filt_teacher, "age"].mean()
    return pd.Series(
        {"Mean Age Techer": mean_age_teacher, "Mean Age Not Teacher": mean_age_not_techer}
    )

# For testing
# group_list = list(df_stu_group.groups.keys())
# df = df_stu_group.get_group(group_list[0])
# filt_teacher = df["Fjob"] == "TEACHER"
# mean_age_teacher = df.loc[filt_teacher, "age"].mean()
# mean_age_not_techer = df.loc[~filt_teacher, "age"].mean()
# pd.Series(
#     {"Mean Age Techer": mean_age_teacher, "Mean Age Not Teacher": mean_age_not_techer}
# )

# Real operation
df_stu_group.apply(average_age_of_teacher, include_groups=False)
