# Modifying DataFrames


In [68]:
import pandas as pd

In [69]:
df = pd.DataFrame(
    [
        [1, "3 inch screw", 0.5, 0.75],
        [2, "2 inch nail", 0.10, 0.25],
        [3, "hammer", 3.00, 5.50],
        [4, "screwdriver", 2.50, 3.00],
    ],
    columns=["Product ID", "Description", "Cost to Manufacture", "Price"],
)
df

Unnamed: 0,Product ID,Description,Cost to Manufacture,Price
0,1,3 inch screw,0.5,0.75
1,2,2 inch nail,0.1,0.25
2,3,hammer,3.0,5.5
3,4,screwdriver,2.5,3.0


### Adding Columns


In [70]:
df["Sold in Bulk?"] = ["Yes", "Yes", "No", "No"]
df

Unnamed: 0,Product ID,Description,Cost to Manufacture,Price,Sold in Bulk?
0,1,3 inch screw,0.5,0.75,Yes
1,2,2 inch nail,0.1,0.25,Yes
2,3,hammer,3.0,5.5,No
3,4,screwdriver,2.5,3.0,No


In [71]:
df["Is taxed?"] = "Yes"
df

Unnamed: 0,Product ID,Description,Cost to Manufacture,Price,Sold in Bulk?,Is taxed?
0,1,3 inch screw,0.5,0.75,Yes,Yes
1,2,2 inch nail,0.1,0.25,Yes,Yes
2,3,hammer,3.0,5.5,No,Yes
3,4,screwdriver,2.5,3.0,No,Yes


In [72]:
df["Margin"] = df.Price - df["Cost to Manufacture"]
df

Unnamed: 0,Product ID,Description,Cost to Manufacture,Price,Sold in Bulk?,Is taxed?,Margin
0,1,3 inch screw,0.5,0.75,Yes,Yes,0.25
1,2,2 inch nail,0.1,0.25,Yes,Yes,0.15
2,3,hammer,3.0,5.5,No,Yes,2.5
3,4,screwdriver,2.5,3.0,No,Yes,0.5


### Column Operations


In [73]:
df.Description = df.Description.apply(str.upper)
df

Unnamed: 0,Product ID,Description,Cost to Manufacture,Price,Sold in Bulk?,Is taxed?,Margin
0,1,3 INCH SCREW,0.5,0.75,Yes,Yes,0.25
1,2,2 INCH NAIL,0.1,0.25,Yes,Yes,0.15
2,3,HAMMER,3.0,5.5,No,Yes,2.5
3,4,SCREWDRIVER,2.5,3.0,No,Yes,0.5


### Applying a Lambda to a Column


In [74]:
df2 = pd.read_csv("employees.csv")

get_last_name = lambda x: x.split(" ")[-1]

df2["last_name"] = df2["name"].apply(get_last_name)

df2.head()

Unnamed: 0,id,name,hourly_wage,hours_worked,last_name
0,10310,Lauren Durham,19,43,Durham
1,18656,Grace Sellers,17,40,Sellers
2,61254,Shirley Rasmussen,16,30,Rasmussen
3,16886,Brian Rojas,18,47,Rojas
4,89010,Samantha Mosley,11,38,Mosley


In [75]:
total_earned = (
    lambda row: row["hours_worked"] * row["hourly_wage"]
    if row["hours_worked"] <= 40
    else 40 * row["hourly_wage"]
    + (row["hours_worked"] - 40) * (row["hourly_wage"] * 1.5)
)

df2["total_earned"] = df2.apply(total_earned, axis=1)
df2.head()

Unnamed: 0,id,name,hourly_wage,hours_worked,last_name,total_earned
0,10310,Lauren Durham,19,43,Durham,845.5
1,18656,Grace Sellers,17,40,Sellers,680.0
2,61254,Shirley Rasmussen,16,30,Rasmussen,480.0
3,16886,Brian Rojas,18,47,Rojas,909.0
4,89010,Samantha Mosley,11,38,Mosley,418.0


### Renaming Columns


In [76]:
df2.columns = [
    "ID",
    "Name",
    "Hourly Wage",
    "Hours Worked",
    " Last Name",
    "Total Earned",
]
df2.columns = ["id", "name", "hourly_wage", "hours_worked", "last_name", "total_earned"]
df2.head()

Unnamed: 0,id,name,hourly_wage,hours_worked,last_name,total_earned
0,10310,Lauren Durham,19,43,Durham,845.5
1,18656,Grace Sellers,17,40,Sellers,680.0
2,61254,Shirley Rasmussen,16,30,Rasmussen,480.0
3,16886,Brian Rojas,18,47,Rojas,909.0
4,89010,Samantha Mosley,11,38,Mosley,418.0


In [77]:
df2.rename(
    columns={"id": "Employee ID", "total_earned": "Earning"}, inplace=True
)  # Using inplace=True lets us edit the original DataFrame
df2.rename(columns={"Employee ID": "id", "Earning": "total_earned"}, inplace=True)
df2.head()

Unnamed: 0,id,name,hourly_wage,hours_worked,last_name,total_earned
0,10310,Lauren Durham,19,43,Durham,845.5
1,18656,Grace Sellers,17,40,Sellers,680.0
2,61254,Shirley Rasmussen,16,30,Rasmussen,480.0
3,16886,Brian Rojas,18,47,Rojas,909.0
4,89010,Samantha Mosley,11,38,Mosley,418.0
