In [1]:
import pandas as pd

# create DataFrame
df = pd.DataFrame(

    {
        "first_name": ["Jane", "John", "Max", "Emily", "Ashley"],
        "last_name": ["Doe", "Doe", "Dune", "Smith", "Fox"],
        "id": [101, 103, 143, 118, 128]
    }
)

# display DataFrame
df

Unnamed: 0,first_name,last_name,id
0,Jane,Doe,101
1,John,Doe,103
2,Max,Dune,143
3,Emily,Smith,118
4,Ashley,Fox,128


In [2]:
# constant value
df.loc[:, "department"] = "engineering"

# display DataFrame
df

Unnamed: 0,first_name,last_name,id,department
0,Jane,Doe,101,engineering
1,John,Doe,103,engineering
2,Max,Dune,143,engineering
3,Emily,Smith,118,engineering
4,Ashley,Fox,128,engineering


In [3]:
# using calculation
import numpy as np

df.loc[:, "salary"] = np.random.randint(40000, 55000, size=5)

# display DataFrame
df

Unnamed: 0,first_name,last_name,id,department,salary
0,Jane,Doe,101,engineering,49260
1,John,Doe,103,engineering,42234
2,Max,Dune,143,engineering,49920
3,Emily,Smith,118,engineering,51708
4,Ashley,Fox,128,engineering,43761


In [4]:
# from another column
df.loc[:, "name"] = df["first_name"] + ' ' + df["last_name"]

# display DataFrame
df

Unnamed: 0,first_name,last_name,id,department,salary,name
0,Jane,Doe,101,engineering,49260,Jane Doe
1,John,Doe,103,engineering,42234,John Doe
2,Max,Dune,143,engineering,49920,Max Dune
3,Emily,Smith,118,engineering,51708,Emily Smith
4,Ashley,Fox,128,engineering,43761,Ashley Fox


In [6]:

# of cat
df.loc[:, "name"] = df["first_name"].str.cat(df["last_name"], sep = ' ')
df

Unnamed: 0,first_name,last_name,id,department,salary,name
0,Jane,Doe,101,engineering,49260,Jane Doe
1,John,Doe,103,engineering,42234,John Doe
2,Max,Dune,143,engineering,49920,Max Dune
3,Emily,Smith,118,engineering,51708,Emily Smith
4,Ashley,Fox,128,engineering,43761,Ashley Fox


In [7]:
# drop the name column
df = df.drop(["name"], axis = 1)

# add the name column as the first column
df.insert(0, "name", df["first_name"].str.cat(df["last_name"], sep = ' '))

# display DataFrame
df

Unnamed: 0,name,first_name,last_name,id,department,salary
0,Jane Doe,Jane,Doe,101,engineering,49260
1,John Doe,John,Doe,103,engineering,42234
2,Max Dune,Max,Dune,143,engineering,49920
3,Emily Smith,Emily,Smith,118,engineering,51708
4,Ashley Fox,Ashley,Fox,128,engineering,43761


In [8]:
# initialize the column with all 0s
df.loc[:, "high_salary"] = 0

# update the values as 1 for the rows that do not fit the given condition
df.loc[:, "high_salary"] = df.where(df["salary"] <= 48000, 1)

# display DataFrame
df

Unnamed: 0,name,first_name,last_name,id,department,salary,high_salary
0,Jane Doe,Jane,Doe,101,engineering,49260,1
1,John Doe,John,Doe,103,engineering,42234,0
2,Max Dune,Max,Dune,143,engineering,49920,1
3,Emily Smith,Emily,Smith,118,engineering,51708,1
4,Ashley Fox,Ashley,Fox,128,engineering,43761,0


In [9]:
# drop the existing high_salary column
df = df.drop(["high_salary"], axis = 1)

# create the column
df.loc[:, "high_salary"] = np.where(df["salary"] <= 48000, 0 , 1)

# display DataFrame
df

Unnamed: 0,name,first_name,last_name,id,department,salary,high_salary
0,Jane Doe,Jane,Doe,101,engineering,49260,1
1,John Doe,John,Doe,103,engineering,42234,0
2,Max Dune,Max,Dune,143,engineering,49920,1
3,Emily Smith,Emily,Smith,118,engineering,51708,1
4,Ashley Fox,Ashley,Fox,128,engineering,43761,0


In [10]:
# create conditions list
conditions = [
    (df["salary"] > 50000),
    (df["salary"] <= 50000) & (df["salary"] > 45000),
    (df["salary"] <= 45000)
]

# create values list
values = ["high", "mid", "low"]

# create the column
df.loc[:, "salary_cond"] = np.select(conditions, values)

# display DataFrame
df

Unnamed: 0,name,first_name,last_name,id,department,salary,high_salary,salary_cond
0,Jane Doe,Jane,Doe,101,engineering,49260,1,mid
1,John Doe,John,Doe,103,engineering,42234,0,low
2,Max Dune,Max,Dune,143,engineering,49920,1,mid
3,Emily Smith,Emily,Smith,118,engineering,51708,1,high
4,Ashley Fox,Ashley,Fox,128,engineering,43761,0,low


In [11]:
# drop the columns first
df = df.drop(["department", "high_salary", "salary_cond"], axis = 1)

# create the columns
df = df.assign(

    department = "engineering",
    high_salary = np.where(df["salary"] <= 48000, 0 , 1),
    salary_condition = np.select(conditions, values)

)

# display DataFrame
df

Unnamed: 0,name,first_name,last_name,id,salary,department,high_salary,salary_condition
0,Jane Doe,Jane,Doe,101,49260,engineering,1,mid
1,John Doe,John,Doe,103,42234,engineering,0,low
2,Max Dune,Max,Dune,143,49920,engineering,1,mid
3,Emily Smith,Emily,Smith,118,51708,engineering,1,high
4,Ashley Fox,Ashley,Fox,128,43761,engineering,0,low


In [12]:
# create a DataFrame with random integers
df = pd.DataFrame(np.random.randint(10, size=(4,5)), columns=list("ABCDE"))

# display DataFrame
df

Unnamed: 0,A,B,C,D,E
0,7,8,8,1,1
1,7,7,0,2,7
2,1,9,2,1,1
3,2,4,0,4,0


In [13]:
# create a DataFrame with random integers
df["total"] = df.apply(np.sum, axis=1)

# display DataFrame
df

Unnamed: 0,A,B,C,D,E,total
0,7,8,8,1,1,25
1,7,7,0,2,7,23
2,1,9,2,1,1,14
3,2,4,0,4,0,10


In [14]:
# create DataFrame
rates = pd.DataFrame({

    "item": ["A", "B", "C", "D"],
    "rates": [[11, 15, 12], [5, 7, 4], [24, 18, 22], [42, 39, 27]]
})

# display DataFrame
rates

Unnamed: 0,item,rates
0,A,"[11, 15, 12]"
1,B,"[5, 7, 4]"
2,C,"[24, 18, 22]"
3,D,"[42, 39, 27]"


In [15]:
# create the min_rate column
rates["min_rate"] = rates["rates"].apply(lambda x: pd.Series(x).min())

# display DataFrame
rates

Unnamed: 0,item,rates,min_rate
0,A,"[11, 15, 12]",11
1,B,"[5, 7, 4]",4
2,C,"[24, 18, 22]",18
3,D,"[42, 39, 27]",27
