# Pandas: Advanced Topics

## Adding new Columns to a DataFrame

In [2]:
import pandas as pd

In [3]:
titanic = pd.read_csv("titanic.csv")

In [4]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [5]:
titanic["Zeros"] = "Zero"

In [6]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros
0,0,3,male,22.0,1,0,7.25,S,,Zero
1,1,1,female,38.0,1,0,71.2833,C,C,Zero
2,1,3,female,26.0,0,0,7.925,S,,Zero
3,1,1,female,35.0,1,0,53.1,S,C,Zero
4,0,3,male,35.0,0,0,8.05,S,,Zero


In [7]:
titanic.Ones = 1

In [8]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros
0,0,3,male,22.0,1,0,7.25,S,,Zero
1,1,1,female,38.0,1,0,71.2833,C,C,Zero
2,1,3,female,26.0,0,0,7.925,S,,Zero
3,1,1,female,35.0,1,0,53.1,S,C,Zero
4,0,3,male,35.0,0,0,8.05,S,,Zero


In [9]:
titanic.Ones

1

## Arithmetic Operations

In [10]:
import numpy as np

### Add/Sub/Mul/Div of Columns

In [11]:
titanic.sibsp + titanic.parch

0      1
1      1
2      0
3      1
4      0
      ..
886    0
887    0
888    3
889    0
890    0
Length: 891, dtype: int64

In [12]:
titanic.sibsp.add(titanic.parch)

0      1
1      1
2      0
3      1
4      0
      ..
886    0
887    0
888    3
889    0
890    0
Length: 891, dtype: int64

In [13]:
titanic["no_relat"] = titanic.sibsp.add(titanic.parch)

In [14]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros,no_relat
0,0,3,male,22.0,1,0,7.25,S,,Zero,1
1,1,1,female,38.0,1,0,71.2833,C,C,Zero,1
2,1,3,female,26.0,0,0,7.925,S,,Zero,0
3,1,1,female,35.0,1,0,53.1,S,C,Zero,1
4,0,3,male,35.0,0,0,8.05,S,,Zero,0


In [15]:
sales = pd.read_csv("sales.csv", index_col = 0)

In [16]:
sales

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Steven,34,27,15,,33
Mike,45,9,74,87.0,12
Andi,17,33,54,8.0,29
Paul,87,67,27,45.0,7


In [17]:
sales.Mon + sales.Thu

Steven      NaN
Mike      132.0
Andi       25.0
Paul      132.0
dtype: float64

In [18]:
sales.Mon.add(sales.Thu, fill_value=0)

Steven     34.0
Mike      132.0
Andi       25.0
Paul      132.0
dtype: float64

In [19]:
sales["perc_Bonus"] = [0.12, 0.15, 0.10, 0.20]

In [20]:
sales

Unnamed: 0,Mon,Tue,Wed,Thu,Fri,perc_Bonus
Steven,34,27,15,,33,0.12
Mike,45,9,74,87.0,12,0.15
Andi,17,33,54,8.0,29,0.1
Paul,87,67,27,45.0,7,0.2


In [21]:
sales.Thu * sales.perc_Bonus

Steven      NaN
Mike      13.05
Andi       0.80
Paul       9.00
dtype: float64

In [22]:
sales.Thu.mul(sales.perc_Bonus, fill_value=0)

Steven     0.00
Mike      13.05
Andi       0.80
Paul       9.00
dtype: float64

In [23]:
sales.iloc[:, :-1].sum(axis = 1).mul(sales.perc_Bonus)

Steven    13.08
Mike      34.05
Andi      14.10
Paul      46.60
dtype: float64

In [24]:
sales["Bonus"] = sales.iloc[:, :-1].sum(axis = 1).mul(sales.perc_Bonus)

In [25]:
sales

Unnamed: 0,Mon,Tue,Wed,Thu,Fri,perc_Bonus,Bonus
Steven,34,27,15,,33,0.12,13.08
Mike,45,9,74,87.0,12,0.15,34.05
Andi,17,33,54,8.0,29,0.1,14.1
Paul,87,67,27,45.0,7,0.2,46.6


### Add/Sub/Mul/Div with Scalar Value

In [26]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros,no_relat
0,0,3,male,22.0,1,0,7.25,S,,Zero,1
1,1,1,female,38.0,1,0,71.2833,C,C,Zero,1
2,1,3,female,26.0,0,0,7.925,S,,Zero,0
3,1,1,female,35.0,1,0,53.1,S,C,Zero,1
4,0,3,male,35.0,0,0,8.05,S,,Zero,0


In [27]:
1912 - titanic.age

0      1890.0
1      1874.0
2      1886.0
3      1877.0
4      1877.0
        ...  
886    1885.0
887    1893.0
888       NaN
889    1886.0
890    1880.0
Name: age, Length: 891, dtype: float64

In [28]:
titanic["YoB"] = titanic.age.sub(1912).mul(-1)

In [29]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros,no_relat,YoB
0,0,3,male,22.0,1,0,7.25,S,,Zero,1,1890.0
1,1,1,female,38.0,1,0,71.2833,C,C,Zero,1,1874.0
2,1,3,female,26.0,0,0,7.925,S,,Zero,0,1886.0
3,1,1,female,35.0,1,0,53.1,S,C,Zero,1,1877.0
4,0,3,male,35.0,0,0,8.05,S,,Zero,0,1877.0


In [30]:
fx_rate = 1.1

In [31]:
titanic["EUR_fare"] = titanic.fare.div(fx_rate)

In [32]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros,no_relat,YoB,EUR_fare
0,0,3,male,22.0,1,0,7.25,S,,Zero,1,1890.0,6.590909
1,1,1,female,38.0,1,0,71.2833,C,C,Zero,1,1874.0,64.803
2,1,3,female,26.0,0,0,7.925,S,,Zero,0,1886.0,7.204545
3,1,1,female,35.0,1,0,53.1,S,C,Zero,1,1877.0,48.272727
4,0,3,male,35.0,0,0,8.05,S,,Zero,0,1877.0,7.318182


In [33]:
titanic.drop(columns = ["sibsp", "parch", "deck", "YoB", "EUR_fare"], inplace =True)

In [34]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,fare,embarked,Zeros,no_relat
0,0,3,male,22.0,7.25,S,Zero,1
1,1,1,female,38.0,71.2833,C,Zero,1
2,1,3,female,26.0,7.925,S,Zero,0
3,1,1,female,35.0,53.1,S,Zero,1
4,0,3,male,35.0,8.05,S,Zero,0


In [35]:
sales

Unnamed: 0,Mon,Tue,Wed,Thu,Fri,perc_Bonus,Bonus
Steven,34,27,15,,33,0.12,13.08
Mike,45,9,74,87.0,12,0.15,34.05
Andi,17,33,54,8.0,29,0.1,14.1
Paul,87,67,27,45.0,7,0.2,46.6


In [36]:
fixed_costs = 5

In [37]:
sales.iloc[:, :-2].sub(fixed_costs, fill_value = 0)

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Steven,29,22,10,-5.0,28
Mike,40,4,69,82.0,7
Andi,12,28,49,3.0,24
Paul,82,62,22,40.0,2


In [38]:
perc_Bonus = 0.1

In [39]:
sales.iloc[:, :-2].mul(perc_Bonus, fill_value = 0)

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Steven,3.4,2.7,1.5,0.0,3.3
Mike,4.5,0.9,7.4,8.7,1.2
Andi,1.7,3.3,5.4,0.8,2.9
Paul,8.7,6.7,2.7,4.5,0.7


In [66]:
sales.iloc[:,:-2]

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Steven,34,27,15,,33
Mike,45,9,74,87.0,12
Andi,17,33,54,8.0,29
Paul,87,67,27,45.0,7


### Manipulating Elements in a DataFrame

In [105]:
titanic = pd.read_csv("titanic.csv")

In [106]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [107]:
titanic.loc[1, "age"] = 40
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [108]:
titanic.iloc[1, 3] = 41
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,41.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [109]:
titanic.loc[1:3, "age"] = 42
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,42.0,0,0,7.925,S,
3,1,1,female,42.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [110]:
titanic.loc[:, "age"] = titanic.loc[:, "age"] * 12
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,264.0,1,0,7.25,S,
1,1,1,female,504.0,1,0,71.2833,C,C
2,1,3,female,504.0,0,0,7.925,S,
3,1,1,female,504.0,1,0,53.1,S,C
4,0,3,male,420.0,0,0,8.05,S,


In [111]:
titanic.loc[:, "age"] = titanic.loc[:, "age"] / 12
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,42.0,0,0,7.925,S,
3,1,1,female,42.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [112]:
index_babies = titanic.loc[titanic.age < 1].index
index_babies

Int64Index([78, 305, 469, 644, 755, 803, 831], dtype='int64')

In [113]:
titanic_babies = titanic.loc[index_babies]

In [114]:
titanic_babies

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


In [115]:
titanic_babies['age'] = titanic_babies['age'].apply(lambda x: x + 1)
titanic_babies

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.83,0,2,29.0,S,
305,1,1,male,1.92,1,2,151.55,S,C
469,1,3,female,1.75,2,1,19.2583,C,
644,1,3,female,1.75,2,1,19.2583,C,
755,1,2,male,1.67,1,1,14.5,S,
803,1,3,male,1.42,0,1,8.5167,C,
831,1,2,male,1.83,1,1,18.75,S,


In [116]:
def sum_one(i):
    return i+1

In [117]:
titanic_babies['fare'] = titanic_babies['fare'].apply(sum_one)
titanic_babies

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.83,0,2,30.0,S,
305,1,1,male,1.92,1,2,152.55,S,C
469,1,3,female,1.75,2,1,20.2583,C,
644,1,3,female,1.75,2,1,20.2583,C,
755,1,2,male,1.67,1,1,15.5,S,
803,1,3,male,1.42,0,1,9.5167,C,
831,1,2,male,1.83,1,1,19.75,S,
