# **DataFrame Basics II**

### **Filtering DataFrames by one Condition**

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv('titanic.csv')

In [3]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [4]:
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   survived  891 non-null    int64  
 1   pclass    891 non-null    int64  
 2   sex       891 non-null    object 
 3   age       714 non-null    float64
 4   sibsp     891 non-null    int64  
 5   parch     891 non-null    int64  
 6   fare      891 non-null    float64
 7   embarked  889 non-null    object 
 8   deck      203 non-null    object 
dtypes: float64(2), int64(4), object(3)
memory usage: 62.8+ KB


In [5]:
titanic.dtypes

survived      int64
pclass        int64
sex          object
age         float64
sibsp         int64
parch         int64
fare        float64
embarked     object
deck         object
dtype: object

In [6]:
titanic.age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [7]:
titanic.sex == "male"

0       True
1      False
2      False
3      False
4       True
       ...  
886     True
887    False
888    False
889     True
890     True
Name: sex, Length: 891, dtype: bool

In [8]:
males1 = titanic[titanic.sex == "male"]

In [9]:
males1.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,


In [10]:
male_filter = titanic.sex == "male"
males2 = titanic.loc[male_filter]

In [11]:
males2.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,


In [12]:
males3 = titanic.loc[male_filter, ["age", "fare"]]

In [13]:
males3.head()

Unnamed: 0,age,fare
0,22.0,7.25
4,35.0,8.05
5,,8.4583
6,54.0,51.8625
7,2.0,21.075


In [14]:
female_filter = titanic.sex == "female"
females = titanic[female_filter]

In [15]:
females

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
8,1,3,female,27.0,0,2,11.1333,S,
9,1,2,female,14.0,1,0,30.0708,C,
...,...,...,...,...,...,...,...,...,...
880,1,2,female,25.0,0,1,26.0000,S,
882,0,3,female,22.0,0,0,10.5167,S,
885,0,3,female,39.0,0,5,29.1250,Q,
887,1,1,female,19.0,0,0,30.0000,S,B


In [16]:
males2.dtypes

survived      int64
pclass        int64
sex          object
age         float64
sibsp         int64
parch         int64
fare        float64
embarked     object
deck         object
dtype: object

In [17]:
object_filter = males2.dtypes == object
males2_numbers = males2.loc[:, ~object_filter]

In [18]:
males2_numbers

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
0,0,3,22.0,1,0,7.2500
4,0,3,35.0,0,0,8.0500
5,0,3,,0,0,8.4583
6,0,1,54.0,0,0,51.8625
7,0,3,2.0,3,1,21.0750
...,...,...,...,...,...,...
883,0,2,28.0,0,0,10.5000
884,0,3,25.0,0,0,7.0500
886,0,2,27.0,0,0,13.0000
889,1,1,26.0,0,0,30.0000


In [19]:
males = titanic.loc[male_filter, ~object_filter]

In [20]:
males.head()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
0,0,3,22.0,1,0,7.25
4,0,3,35.0,0,0,8.05
5,0,3,,0,0,8.4583
6,0,1,54.0,0,0,51.8625
7,0,3,2.0,3,1,21.075


### **Filtering DataFrames by many Condition (AND)**

In [21]:
import pandas as pd

In [22]:
titanic = pd.read_csv('titanic.csv')

In [23]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [24]:
males_filter = titanic['sex'] == "male"
males = titanic.loc[males_filter]
males.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,


In [25]:
age_filter = titanic['age'] > 14
grown_ups = titanic.loc[age_filter]
grown_ups.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [26]:
grown_males = titanic.loc[males_filter & age_filter]
grown_males.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
4,0,3,male,35.0,0,0,8.05,S,
6,0,1,male,54.0,0,0,51.8625,S,E
12,0,3,male,20.0,0,0,8.05,S,
13,0,3,male,39.0,1,5,31.275,S,


In [27]:
grown_males.describe()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
count,414.0,414.0,414.0,414.0,414.0,414.0
mean,0.173913,2.309179,33.129227,0.268116,0.169082,26.533312
std,0.379493,0.829868,12.922177,0.54626,0.557068,47.15163
min,0.0,1.0,15.0,0.0,0.0,0.0
25%,0.0,2.0,23.0,0.0,0.0,7.8958
50%,0.0,3.0,30.0,0.0,0.0,10.5
75%,0.0,3.0,40.0,0.0,0.0,26.55
max,1.0,3.0,80.0,4.0,5.0,512.3292


### **Filtering DataFrames by many Condition (OR)**

In [28]:
import pandas as pd

In [29]:
titanic = pd.read_csv('titanic.csv')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [30]:
females_filter = titanic.sex == 'female'
females_filter.head()

0    False
1     True
2     True
3     True
4    False
Name: sex, dtype: bool

In [31]:
child_filter = titanic['age'] < 14
child_filter.head()

0    False
1    False
2    False
3    False
4    False
Name: age, dtype: bool

In [32]:
woman_or_child = titanic.loc[females_filter | child_filter]

In [33]:
woman_or_child.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
7,0,3,male,2.0,3,1,21.075,S,
8,1,3,female,27.0,0,2,11.1333,S,


In [34]:
woman_or_child.describe()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
count,351.0,351.0,298.0,351.0,351.0,351.0
mean,0.723647,2.205128,25.039161,0.846154,0.723647,43.441786
std,0.447832,0.847232,15.314631,1.306787,1.008809,55.676194
min,0.0,1.0,0.42,0.0,0.0,6.75
25%,0.0,1.0,14.125,0.0,0.0,12.475
50%,1.0,2.0,24.0,0.0,0.0,24.15
75%,1.0,3.0,35.0,1.0,1.0,51.93125
max,1.0,3.0,63.0,8.0,6.0,512.3292


In [35]:
pd.read_csv('summer.csv')

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...,...,...
31160,2012,London,Wrestling,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,Wrestling,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,Wrestling,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,Wrestling,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze


## **Advanced Filtering with between(), isin() and ~**

In [36]:
import pandas as pd

In [37]:
summer = pd.read_csv('summer.csv')

In [38]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [39]:
summer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31165 entries, 0 to 31164
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Year        31165 non-null  int64 
 1   City        31165 non-null  object
 2   Sport       31165 non-null  object
 3   Discipline  31165 non-null  object
 4   Athlete     31165 non-null  object
 5   Country     31161 non-null  object
 6   Gender      31165 non-null  object
 7   Event       31165 non-null  object
 8   Medal       31165 non-null  object
dtypes: int64(1), object(8)
memory usage: 2.1+ MB


In [40]:
games_1988 = summer.loc[summer.Year == 1988]

In [41]:
games_1988.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
18051,1988,Seoul,Aquatics,Diving,"MENA CAMPOS, Jesus",MEX,Men,10M Platform,Bronze
18052,1988,Seoul,Aquatics,Diving,"LOUGANIS, Gregory",USA,Men,10M Platform,Gold
18053,1988,Seoul,Aquatics,Diving,"XIONG, Ni",CHN,Men,10M Platform,Silver
18054,1988,Seoul,Aquatics,Diving,"WYLAND-WILLIAMS, Wendy Lian",USA,Women,10M Platform,Bronze
18055,1988,Seoul,Aquatics,Diving,"XU, Yan-Mei",CHN,Women,10M Platform,Gold


In [42]:
games_1988.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1546 entries, 18051 to 19596
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Year        1546 non-null   int64 
 1   City        1546 non-null   object
 2   Sport       1546 non-null   object
 3   Discipline  1546 non-null   object
 4   Athlete     1546 non-null   object
 5   Country     1546 non-null   object
 6   Gender      1546 non-null   object
 7   Event       1546 non-null   object
 8   Medal       1546 non-null   object
dtypes: int64(1), object(8)
memory usage: 120.8+ KB


In [43]:
games_since_1992 = summer.loc[summer.Year >= 1992]

In [44]:
games_since_1992

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
19597,1992,Barcelona,Aquatics,Diving,"XIONG, Ni",CHN,Men,10M Platform,Bronze
19598,1992,Barcelona,Aquatics,Diving,"SUN, Shuwei",CHN,Men,10M Platform,Gold
19599,1992,Barcelona,Aquatics,Diving,"DONIE, Scott R.",USA,Men,10M Platform,Silver
19600,1992,Barcelona,Aquatics,Diving,"CLARK, Mary Ellen",USA,Women,10M Platform,Bronze
19601,1992,Barcelona,Aquatics,Diving,"FU, Mingxia",CHN,Women,10M Platform,Gold
...,...,...,...,...,...,...,...,...,...
31160,2012,London,Wrestling,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,Wrestling,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,Wrestling,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,Wrestling,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze


In [45]:
games_since_1992.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
19597,1992,Barcelona,Aquatics,Diving,"XIONG, Ni",CHN,Men,10M Platform,Bronze
19598,1992,Barcelona,Aquatics,Diving,"SUN, Shuwei",CHN,Men,10M Platform,Gold
19599,1992,Barcelona,Aquatics,Diving,"DONIE, Scott R.",USA,Men,10M Platform,Silver
19600,1992,Barcelona,Aquatics,Diving,"CLARK, Mary Ellen",USA,Women,10M Platform,Bronze
19601,1992,Barcelona,Aquatics,Diving,"FU, Mingxia",CHN,Women,10M Platform,Gold


In [46]:
games_since_1992.tail()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
31160,2012,London,Wrestling,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,Wrestling,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,Wrestling,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,Wrestling,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze
31164,2012,London,Wrestling,Wrestling Freestyle,"LIDBERG, Jimmy",SWE,Men,Wg 96 KG,Bronze


In [47]:
games_90s = summer[summer.Year.between(left = 1990, right = 1999, inclusive = 'both')]

In [48]:
games_90s

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
19597,1992,Barcelona,Aquatics,Diving,"XIONG, Ni",CHN,Men,10M Platform,Bronze
19598,1992,Barcelona,Aquatics,Diving,"SUN, Shuwei",CHN,Men,10M Platform,Gold
19599,1992,Barcelona,Aquatics,Diving,"DONIE, Scott R.",USA,Men,10M Platform,Silver
19600,1992,Barcelona,Aquatics,Diving,"CLARK, Mary Ellen",USA,Women,10M Platform,Bronze
19601,1992,Barcelona,Aquatics,Diving,"FU, Mingxia",CHN,Women,10M Platform,Gold
...,...,...,...,...,...,...,...,...,...
23156,1996,Atlanta,Wrestling,Wrestling Gre-R,"OLEYNYK, Vyacheslav",UKR,Men,82 - 90KG (Light-Heavyweight),Gold
23157,1996,Atlanta,Wrestling,Wrestling Gre-R,"FAFINSKI, Jacek",POL,Men,82 - 90KG (Light-Heavyweight),Silver
23158,1996,Atlanta,Wrestling,Wrestling Gre-R,"LJUNGBERG, Mikael",SWE,Men,90 - 100KG (Heavyweight),Bronze
23159,1996,Atlanta,Wrestling,Wrestling Gre-R,"WRONSKI, Andrzej",POL,Men,90 - 100KG (Heavyweight),Gold


In [49]:
selected_games = summer.loc[summer.Year.isin([1976, 1988])]

In [50]:
selected_games

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
13900,1976,Montreal,Aquatics,Diving,"ALEINIK, Vladimir",URS,Men,10M Platform,Bronze
13901,1976,Montreal,Aquatics,Diving,"DIBIASI, Klaus",ITA,Men,10M Platform,Gold
13902,1976,Montreal,Aquatics,Diving,"LOUGANIS, Gregory",USA,Men,10M Platform,Silver
13903,1976,Montreal,Aquatics,Diving,"WILSON, Deborah Keplar",USA,Women,10M Platform,Bronze
13904,1976,Montreal,Aquatics,Diving,"VAYTSEKHOVSKAYA, Elena",URS,Women,10M Platform,Gold
...,...,...,...,...,...,...,...,...,...
19592,1988,Seoul,Wrestling,Wrestling Gre-R,"KOMCHEV, Atanas Slavov",BUL,Men,82 - 90KG (Light-Heavyweight),Gold
19593,1988,Seoul,Wrestling,Wrestling Gre-R,"KOSKELA, Harri Matias",FIN,Men,82 - 90KG (Light-Heavyweight),Silver
19594,1988,Seoul,Wrestling,Wrestling Gre-R,"KOSLOWSKI, Dennis Marvin",USA,Men,90 - 100KG (Heavyweight),Bronze
19595,1988,Seoul,Wrestling,Wrestling Gre-R,"WRONSKI, Andrzej",POL,Men,90 - 100KG (Heavyweight),Gold


In [51]:
discipline = selected_games.Discipline == 'Weightlifting'
country = selected_games.Country == 'BUL'
selected_games.loc[discipline & country]

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
15122,1976,Montreal,Weightlifting,Weightlifting,"NURIKIAN, Norair",BUL,Men,"- 56KG, Total (Bantamweight)",Gold
15129,1976,Montreal,Weightlifting,Weightlifting,"TODOROV, Georghi",BUL,Men,"56 - 60KG, Total (Featherweight)",Silver
15134,1976,Montreal,Weightlifting,Weightlifting,"MITKOV, Yordan",BUL,Men,"67.5 - 75KG, Total (Middleweight)",Gold
15138,1976,Montreal,Weightlifting,Weightlifting,"STOITCHEV, Trendafil",BUL,Men,"75 - 82.5KG, Total (Light-Heavyweight)",Silver
15139,1976,Montreal,Weightlifting,Weightlifting,"SHOPOV, Atanas",BUL,Men,"82.5 - 90KG, Total (Middle-Heavyweight)",Bronze
15144,1976,Montreal,Weightlifting,Weightlifting,"SEMERDJIEV, Krastio",BUL,Men,"91 - 110KG, Total (Heavyweight)",Silver
19508,1988,Seoul,Weightlifting,Weightlifting,"MARINOV, Sevdalin",BUL,Men,"- 52KG, Total (Flyweight)",Gold
19521,1988,Seoul,Weightlifting,Weightlifting,"TOPUROV, Stefan",BUL,Men,"56 - 60KG, Total (Featherweight)",Silver
19525,1988,Seoul,Weightlifting,Weightlifting,"VARBANOV, Aleksander",BUL,Men,"67.5 - 75KG, Total (Middleweight)",Bronze
19526,1988,Seoul,Weightlifting,Weightlifting,"GIDIKOV, Borislav",BUL,Men,"67.5 - 75KG, Total (Middleweight)",Gold


## **any()** and **all()**

In [52]:
import pandas as pd

In [53]:
titanic = pd.read_csv('titanic.csv')

In [54]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [55]:
(titanic.sex == 'male').any()

True

In [56]:
(titanic.sex == 'male').all()

False

In [57]:
(titanic.age == 80.0).any()

True

In [58]:
titanic.loc[titanic.age == 80.0]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
630,1,1,male,80.0,0,0,30.0,S,A


In [59]:
pd_series = pd.Series([1, 5, -5, 0, -1, 0.5])

In [60]:
pd_series.any()

True

In [61]:
pd_series.all()

False

In [62]:
fares = titanic.fare.unique()

In [63]:
fares.sort()

In [64]:
fares = pd.Series(fares)

In [65]:
fares.head()

0    0.0000
1    4.0125
2    5.0000
3    6.2375
4    6.4375
dtype: float64

In [66]:
titanic.fare.all()

False

## **Removing columns**

In [67]:
import pandas as pd

In [68]:
summer = pd.read_csv('summer.csv')

In [69]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [70]:
summer.drop(columns = 'Sport')

Unnamed: 0,Year,City,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...,...
31160,2012,London,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze


In [71]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [72]:
summer.drop(columns = ['Sport', 'Discipline'])

Unnamed: 0,Year,City,Athlete,Country,Gender,Event,Medal
0,1896,Athens,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...
31160,2012,London,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze


In [73]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [74]:
summer.drop(labels = 'Event', axis = 1)
# summer.drop(labels = 'Event', axis = "columns") - alternative approach

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,Silver
...,...,...,...,...,...,...,...,...
31160,2012,London,Wrestling,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Bronze
31161,2012,London,Wrestling,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Gold
31162,2012,London,Wrestling,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Silver
31163,2012,London,Wrestling,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Bronze


In [75]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [76]:
del summer['Sport']

In [77]:
summer.head()

Unnamed: 0,Year,City,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [78]:
summer = pd.read_csv('summer.csv')

In [79]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [80]:
summer.loc[ :, ['Year', 'City', 'Athlete', 'Country', 'Gender', 'Event', 'Medal']]

Unnamed: 0,Year,City,Athlete,Country,Gender,Event,Medal
0,1896,Athens,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...
31160,2012,London,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze


## **Removing rows**

In [81]:
import pandas as pd

In [82]:
summer = pd.read_csv('summer.csv',index_col = 'Athlete')

In [83]:
summer.head(10)

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Bronze
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,1200M Freestyle,Gold
"ANDREOU, Joannis",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,400M Freestyle,Bronze
"NEUMANN, Paul",1896,Athens,Aquatics,Swimming,AUT,Men,400M Freestyle,Gold


In [84]:
summer.drop(index = 'HAJOS, Alfred').head(10)

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Bronze
"ANDREOU, Joannis",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,400M Freestyle,Bronze
"NEUMANN, Paul",1896,Athens,Aquatics,Swimming,AUT,Men,400M Freestyle,Gold
"PEPANOS, Antonios",1896,Athens,Aquatics,Swimming,GRE,Men,400M Freestyle,Silver
"LANE, Francis",1896,Athens,Athletics,Athletics,USA,Men,100M,Bronze


In [85]:
summer.drop(index = ['HAJOS, Alfred', 'DRIVAS, Dimitrios']).head(10)

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Bronze
"ANDREOU, Joannis",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,400M Freestyle,Bronze
"NEUMANN, Paul",1896,Athens,Aquatics,Swimming,AUT,Men,400M Freestyle,Gold
"PEPANOS, Antonios",1896,Athens,Aquatics,Swimming,GRE,Men,400M Freestyle,Silver
"LANE, Francis",1896,Athens,Athletics,Athletics,USA,Men,100M,Bronze
"SZOKOLYI, Alajos",1896,Athens,Athletics,Athletics,HUN,Men,100M,Bronze


In [86]:
summer.drop(labels = "HERSCHMANN, Otto", axis = 0).head(10)

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Bronze
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,1200M Freestyle,Gold
"ANDREOU, Joannis",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,400M Freestyle,Bronze
"NEUMANN, Paul",1896,Athens,Aquatics,Swimming,AUT,Men,400M Freestyle,Gold
"PEPANOS, Antonios",1896,Athens,Aquatics,Swimming,GRE,Men,400M Freestyle,Silver


In [87]:
summer.loc[summer.Year == 1996]

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"XIAO, Hailiang",1996,Atlanta,Aquatics,Diving,CHN,Men,10M Platform,Bronze
"SAUTIN, Dmitry",1996,Atlanta,Aquatics,Diving,RUS,Men,10M Platform,Gold
"HEMPEL, Jan",1996,Atlanta,Aquatics,Diving,GER,Men,10M Platform,Silver
"CLARK, Mary Ellen",1996,Atlanta,Aquatics,Diving,USA,Women,10M Platform,Bronze
"FU, Mingxia",1996,Atlanta,Aquatics,Diving,CHN,Women,10M Platform,Gold
...,...,...,...,...,...,...,...,...
"OLEYNYK, Vyacheslav",1996,Atlanta,Wrestling,Wrestling Gre-R,UKR,Men,82 - 90KG (Light-Heavyweight),Gold
"FAFINSKI, Jacek",1996,Atlanta,Wrestling,Wrestling Gre-R,POL,Men,82 - 90KG (Light-Heavyweight),Silver
"LJUNGBERG, Mikael",1996,Atlanta,Wrestling,Wrestling Gre-R,SWE,Men,90 - 100KG (Heavyweight),Bronze
"WRONSKI, Andrzej",1996,Atlanta,Wrestling,Wrestling Gre-R,POL,Men,90 - 100KG (Heavyweight),Gold


In [88]:
filter1 = summer.Year == 1996
filter2 = summer.Sport == 'Aquatics'
summer.loc[~(filter1 | filter2)]

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"LANE, Francis",1896,Athens,Athletics,Athletics,USA,Men,100M,Bronze
"SZOKOLYI, Alajos",1896,Athens,Athletics,Athletics,HUN,Men,100M,Bronze
"BURKE, Thomas",1896,Athens,Athletics,Athletics,USA,Men,100M,Gold
"HOFMANN, Fritz",1896,Athens,Athletics,Athletics,GER,Men,100M,Silver
"CURTIS, Thomas",1896,Athens,Athletics,Athletics,USA,Men,110M Hurdles,Gold
...,...,...,...,...,...,...,...,...
"JANIKOWSKI, Damian",2012,London,Wrestling,Wrestling Freestyle,POL,Men,Wg 84 KG,Bronze
"REZAEI, Ghasem Gholamreza",2012,London,Wrestling,Wrestling Freestyle,IRI,Men,Wg 96 KG,Gold
"TOTROV, Rustam",2012,London,Wrestling,Wrestling Freestyle,RUS,Men,Wg 96 KG,Silver
"ALEKSANYAN, Artur",2012,London,Wrestling,Wrestling Freestyle,ARM,Men,Wg 96 KG,Bronze


## **Adding new Columns to a DataFrame**

In [89]:
import pandas as pd

In [90]:
titanic = pd.read_csv('titanic.csv')

In [91]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [92]:
titanic['Zeros']

KeyError: 'Zeros'

In [93]:
titanic['Zeros'] = 0

In [94]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros
0,0,3,male,22.0,1,0,7.25,S,,0
1,1,1,female,38.0,1,0,71.2833,C,C,0
2,1,3,female,26.0,0,0,7.925,S,,0
3,1,1,female,35.0,1,0,53.1,S,C,0
4,0,3,male,35.0,0,0,8.05,S,,0


In [95]:
titanic['Zeros_2'] = '0'

In [96]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros,Zeros_2
0,0,3,male,22.0,1,0,7.25,S,,0,0
1,1,1,female,38.0,1,0,71.2833,C,C,0,0
2,1,3,female,26.0,0,0,7.925,S,,0,0
3,1,1,female,35.0,1,0,53.1,S,C,0,0
4,0,3,male,35.0,0,0,8.05,S,,0,0


In [97]:
titanic.dtypes

survived      int64
pclass        int64
sex          object
age         float64
sibsp         int64
parch         int64
fare        float64
embarked     object
deck         object
Zeros         int64
Zeros_2      object
dtype: object

In [98]:
titanic.Ones

AttributeError: 'DataFrame' object has no attribute 'Ones'

In [99]:
titanic.Ones = 1

In [100]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros,Zeros_2
0,0,3,male,22.0,1,0,7.25,S,,0,0
1,1,1,female,38.0,1,0,71.2833,C,C,0,0
2,1,3,female,26.0,0,0,7.925,S,,0,0
3,1,1,female,35.0,1,0,53.1,S,C,0,0
4,0,3,male,35.0,0,0,8.05,S,,0,0


In [101]:
titanic.Ones

1

## **Creating Columns based on other Columns**

In [102]:
import pandas as pd

In [103]:
titanic = pd.read_csv('titanic.csv')

In [104]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [105]:
1912 - titanic.age

0      1890.0
1      1874.0
2      1886.0
3      1877.0
4      1877.0
        ...  
886    1885.0
887    1893.0
888       NaN
889    1886.0
890    1880.0
Name: age, Length: 891, dtype: float64

In [106]:
titanic['YoB'] = 1912 - titanic.age

In [107]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,YoB
0,0,3,male,22.0,1,0,7.25,S,,1890.0
1,1,1,female,38.0,1,0,71.2833,C,C,1874.0
2,1,3,female,26.0,0,0,7.925,S,,1886.0
3,1,1,female,35.0,1,0,53.1,S,C,1877.0
4,0,3,male,35.0,0,0,8.05,S,,1877.0


In [108]:
titanic['relatives'] = titanic['sibsp'] + titanic['parch']

In [109]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,YoB,relatives
0,0,3,male,22.0,1,0,7.25,S,,1890.0,1
1,1,1,female,38.0,1,0,71.2833,C,C,1874.0,1
2,1,3,female,26.0,0,0,7.925,S,,1886.0,0
3,1,1,female,35.0,1,0,53.1,S,C,1877.0,1
4,0,3,male,35.0,0,0,8.05,S,,1877.0,0


In [110]:
titanic.drop(columns = ['sibsp', 'parch'], inplace = True)

In [111]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,fare,embarked,deck,YoB,relatives
0,0,3,male,22.0,7.25,S,,1890.0,1
1,1,1,female,38.0,71.2833,C,C,1874.0,1
2,1,3,female,26.0,7.925,S,,1886.0,0
3,1,1,female,35.0,53.1,S,C,1877.0,1
4,0,3,male,35.0,8.05,S,,1877.0,0


In [112]:
inflation_factor = 10

In [113]:
titanic['ia_fare'] = titanic.fare * inflation_factor

In [114]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,fare,embarked,deck,YoB,relatives,ia_fare
0,0,3,male,22.0,7.25,S,,1890.0,1,72.5
1,1,1,female,38.0,71.2833,C,C,1874.0,1,712.833
2,1,3,female,26.0,7.925,S,,1886.0,0,79.25
3,1,1,female,35.0,53.1,S,C,1877.0,1,531.0
4,0,3,male,35.0,8.05,S,,1877.0,0,80.5


## **Adding Columns with insert()**

In [115]:
import pandas as pd

In [116]:
titanic = pd.read_csv('titanic.csv')

In [117]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [118]:
relatives = titanic['sibsp'] + titanic['parch']

In [119]:
relatives.head()

0    1
1    1
2    0
3    1
4    0
dtype: int64

In [120]:
titanic.insert(column = 'relatives', value = relatives, loc = 6)

In [121]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,relatives,fare,embarked,deck
0,0,3,male,22.0,1,0,1,7.25,S,
1,1,1,female,38.0,1,0,1,71.2833,C,C
2,1,3,female,26.0,0,0,0,7.925,S,
3,1,1,female,35.0,1,0,1,53.1,S,C
4,0,3,male,35.0,0,0,0,8.05,S,


## **Creating DataFrames from Scratch with pd.DataFrame()**

In [122]:
import pandas as pd

In [123]:
player = ["Lionel Messi", "Cristiano Ronaldo", "Neymar Junior", "Kylian Mbappe", "Manuel Neuer"]

In [124]:
nationality = ["Argentina", "Portugal", "Brasil", "France", "Germany"]

In [125]:
club = ["FC Barcelona", "Juventus FC", "Paris SG", "Paris SG", "FC Bayern" ]

In [126]:
world_champion = [False, False, False, True, True]

In [127]:
height = [1.70, 1.87, 1.75, 1.78, 1.93]

In [128]:
goals = [45, 44, 28, 21, 0]

In [129]:
dic = {"Player":player, "Nationality":nationality, "Club":club, 
        "World_Champion":world_champion, "Height":height, "Goals_2018":goals
       }

In [130]:
dic

{'Player': ['Lionel Messi',
  'Cristiano Ronaldo',
  'Neymar Junior',
  'Kylian Mbappe',
  'Manuel Neuer'],
 'Nationality': ['Argentina', 'Portugal', 'Brasil', 'France', 'Germany'],
 'Club': ['FC Barcelona', 'Juventus FC', 'Paris SG', 'Paris SG', 'FC Bayern'],
 'World_Champion': [False, False, False, True, True],
 'Height': [1.7, 1.87, 1.75, 1.78, 1.93],
 'Goals_2018': [45, 44, 28, 21, 0]}

In [131]:
df = pd.DataFrame(data = dic)

In [132]:
df

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Lionel Messi,Argentina,FC Barcelona,False,1.7,45
1,Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
2,Neymar Junior,Brasil,Paris SG,False,1.75,28
3,Kylian Mbappe,France,Paris SG,True,1.78,21
4,Manuel Neuer,Germany,FC Bayern,True,1.93,0


In [133]:
players = df.set_index('Player')

In [134]:
players

Unnamed: 0_level_0,Nationality,Club,World_Champion,Height,Goals_2018
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lionel Messi,Argentina,FC Barcelona,False,1.7,45
Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
Neymar Junior,Brasil,Paris SG,False,1.75,28
Kylian Mbappe,France,Paris SG,True,1.78,21
Manuel Neuer,Germany,FC Bayern,True,1.93,0


In [135]:
raw_data = list(zip(nationality, club, world_champion, height, goals))

In [136]:
raw_data

[('Argentina', 'FC Barcelona', False, 1.7, 45),
 ('Portugal', 'Juventus FC', False, 1.87, 44),
 ('Brasil', 'Paris SG', False, 1.75, 28),
 ('France', 'Paris SG', True, 1.78, 21),
 ('Germany', 'FC Bayern', True, 1.93, 0)]

In [137]:
messi, ronaldo, neymar, mbappe, neuer = raw_data

In [138]:
messi

('Argentina', 'FC Barcelona', False, 1.7, 45)

In [139]:
headers = ["Nationality", "Club", "World_Champion", "Height", "Goals_2018"]

In [140]:
df_1 = pd.DataFrame(data = raw_data, index = player, columns = headers)

In [141]:
df_1

Unnamed: 0,Nationality,Club,World_Champion,Height,Goals_2018
Lionel Messi,Argentina,FC Barcelona,False,1.7,45
Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
Neymar Junior,Brasil,Paris SG,False,1.75,28
Kylian Mbappe,France,Paris SG,True,1.78,21
Manuel Neuer,Germany,FC Bayern,True,1.93,0


In [142]:
df_2 = pd.DataFrame(data = [messi, ronaldo, neymar, mbappe, neuer], index = player, columns = headers)

In [143]:
df_2

Unnamed: 0,Nationality,Club,World_Champion,Height,Goals_2018
Lionel Messi,Argentina,FC Barcelona,False,1.7,45
Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
Neymar Junior,Brasil,Paris SG,False,1.75,28
Kylian Mbappe,France,Paris SG,True,1.78,21
Manuel Neuer,Germany,FC Bayern,True,1.93,0


In [144]:
df_3 = pd.Series(index = player, data = nationality, name = 'Nationality').to_frame()

In [145]:
df_3

Unnamed: 0,Nationality
Lionel Messi,Argentina
Cristiano Ronaldo,Portugal
Neymar Junior,Brasil
Kylian Mbappe,France
Manuel Neuer,Germany


In [146]:
df_3['Club'] = club

In [147]:
df_3

Unnamed: 0,Nationality,Club
Lionel Messi,Argentina,FC Barcelona
Cristiano Ronaldo,Portugal,Juventus FC
Neymar Junior,Brasil,Paris SG
Kylian Mbappe,France,Paris SG
Manuel Neuer,Germany,FC Bayern


## **Adding new Rows (hands-on approach)**

In [148]:
players.reset_index(inplace = True)

In [149]:
players

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Lionel Messi,Argentina,FC Barcelona,False,1.7,45
1,Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
2,Neymar Junior,Brasil,Paris SG,False,1.75,28
3,Kylian Mbappe,France,Paris SG,True,1.78,21
4,Manuel Neuer,Germany,FC Bayern,True,1.93,0


In [150]:
players.loc[5, :] = ['Berbatov', 'Bulgaria', 'Man Utd', 'False', 1.88, 0]

In [151]:
players

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Lionel Messi,Argentina,FC Barcelona,False,1.7,45.0
1,Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44.0
2,Neymar Junior,Brasil,Paris SG,False,1.75,28.0
3,Kylian Mbappe,France,Paris SG,True,1.78,21.0
4,Manuel Neuer,Germany,FC Bayern,True,1.93,0.0
5,Berbatov,Bulgaria,Man Utd,False,1.88,0.0


In [152]:
new = pd.DataFrame(data = [['Stoichkov', 'Bulgaria', ' FC Barcelona', 'False', 1.80, 0],
                           ['van Basten', 'Netherlands', 'FC Milan', 'False', 1.90, 0]],
                   columns = players.columns)

In [153]:
new

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Stoichkov,Bulgaria,FC Barcelona,False,1.8,0
1,van Basten,Netherlands,FC Milan,False,1.9,0


In [154]:
mixed_df = pd.concat([players, new], ignore_index = True)

In [155]:
mixed_df

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Lionel Messi,Argentina,FC Barcelona,False,1.7,45.0
1,Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44.0
2,Neymar Junior,Brasil,Paris SG,False,1.75,28.0
3,Kylian Mbappe,France,Paris SG,True,1.78,21.0
4,Manuel Neuer,Germany,FC Bayern,True,1.93,0.0
5,Berbatov,Bulgaria,Man Utd,False,1.88,0.0
6,Stoichkov,Bulgaria,FC Barcelona,False,1.8,0.0
7,van Basten,Netherlands,FC Milan,False,1.9,0.0


In [156]:
mixed_df.set_index('Player', inplace = True)

In [157]:
mixed_df

Unnamed: 0_level_0,Nationality,Club,World_Champion,Height,Goals_2018
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lionel Messi,Argentina,FC Barcelona,False,1.7,45.0
Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44.0
Neymar Junior,Brasil,Paris SG,False,1.75,28.0
Kylian Mbappe,France,Paris SG,True,1.78,21.0
Manuel Neuer,Germany,FC Bayern,True,1.93,0.0
Berbatov,Bulgaria,Man Utd,False,1.88,0.0
Stoichkov,Bulgaria,FC Barcelona,False,1.8,0.0
van Basten,Netherlands,FC Milan,False,1.9,0.0
