## DataFrame Basics II

### Filtering DataFrames with one Condition

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv("titanic.csv")

In [3]:
titanic.head(10)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,
8,1,3,female,27.0,0,2,11.1333,S,
9,1,2,female,14.0,1,0,30.0708,C,


In [4]:
titanic.sex.head(10)

0      male
1    female
2    female
3    female
4      male
5      male
6      male
7      male
8    female
9    female
Name: sex, dtype: object

In [5]:
titanic.sex == "male"

0       True
1      False
2      False
3      False
4       True
       ...  
886     True
887    False
888    False
889     True
890     True
Name: sex, Length: 891, dtype: bool

In [6]:
titanic[titanic.sex == "male"]["fare"]

0       7.2500
4       8.0500
5       8.4583
6      51.8625
7      21.0750
        ...   
883    10.5000
884     7.0500
886    13.0000
889    30.0000
890     7.7500
Name: fare, Length: 577, dtype: float64

In [7]:
titanic.loc[titanic.sex == "male", "fare"]

0       7.2500
4       8.0500
5       8.4583
6      51.8625
7      21.0750
        ...   
883    10.5000
884     7.0500
886    13.0000
889    30.0000
890     7.7500
Name: fare, Length: 577, dtype: float64

In [8]:
mask1 = titanic.sex == "male"
mask1

0       True
1      False
2      False
3      False
4       True
       ...  
886     True
887    False
888    False
889     True
890     True
Name: sex, Length: 891, dtype: bool

In [9]:
titanic_male = titanic.loc[mask1]

In [10]:
titanic_male.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,


In [11]:
titanic.dtypes# == object

survived      int64
pclass        int64
sex          object
age         float64
sibsp         int64
parch         int64
fare        float64
embarked     object
deck         object
dtype: object

In [12]:
mask2 = titanic.dtypes == object
mask2

survived    False
pclass      False
sex          True
age         False
sibsp       False
parch       False
fare        False
embarked     True
deck         True
dtype: bool

In [13]:
titanic.loc[:, ~mask2] # all rows and column from inverse of mask2 

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
0,0,3,22.0,1,0,7.2500
1,1,1,38.0,1,0,71.2833
2,1,3,26.0,0,0,7.9250
3,1,1,35.0,1,0,53.1000
4,0,3,35.0,0,0,8.0500
...,...,...,...,...,...,...
886,0,2,27.0,0,0,13.0000
887,1,1,19.0,0,0,30.0000
888,0,3,,1,2,23.4500
889,1,1,26.0,0,0,30.0000


### Filtering DataFrames with many Conditions (AND)

In [14]:
import pandas as pd

In [15]:
titanic = pd.read_csv("titanic.csv")

In [16]:
titanic.head(10)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,
8,1,3,female,27.0,0,2,11.1333,S,
9,1,2,female,14.0,1,0,30.0708,C,


In [17]:
mask1 = titanic.sex == "male"
mask1.head()

0     True
1    False
2    False
3    False
4     True
Name: sex, dtype: bool

In [18]:
mask2 = titanic.age > 14
mask2.head()

0    True
1    True
2    True
3    True
4    True
Name: age, dtype: bool

In [19]:
(mask1 & mask2).head()

0     True
1    False
2    False
3    False
4     True
dtype: bool

In [20]:
male_adult = titanic.loc[mask1 & mask2, ["survived", "pclass", "sex", "age"]]
male_adult.head(20)

Unnamed: 0,survived,pclass,sex,age
0,0,3,male,22.0
4,0,3,male,35.0
6,0,1,male,54.0
12,0,3,male,20.0
13,0,3,male,39.0
20,0,2,male,35.0
21,1,2,male,34.0
23,1,1,male,28.0
27,0,1,male,19.0
30,0,1,male,40.0


In [21]:
male_adult.info()

<class 'pandas.core.frame.DataFrame'>
Index: 414 entries, 0 to 890
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   survived  414 non-null    int64  
 1   pclass    414 non-null    int64  
 2   sex       414 non-null    object 
 3   age       414 non-null    float64
dtypes: float64(1), int64(2), object(1)
memory usage: 16.2+ KB


In [22]:
male_adult.describe()


Unnamed: 0,survived,pclass,age
count,414.0,414.0,414.0
mean,0.173913,2.309179,33.129227
std,0.379493,0.829868,12.922177
min,0.0,1.0,15.0
25%,0.0,2.0,23.0
50%,0.0,3.0,30.0
75%,0.0,3.0,40.0
max,1.0,3.0,80.0


In [23]:
titanic.describe()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
count,891.0,891.0,714.0,891.0,891.0,891.0
mean,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,0.0,1.0,0.42,0.0,0.0,0.0
25%,0.0,2.0,20.125,0.0,0.0,7.9104
50%,0.0,3.0,28.0,0.0,0.0,14.4542
75%,1.0,3.0,38.0,1.0,0.0,31.0
max,1.0,3.0,80.0,8.0,6.0,512.3292


### Filtering DataFrames with many Conditions (OR)

In [24]:
import pandas as pd

In [25]:
titanic = pd.read_csv("titanic.csv")

In [26]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [27]:
mask1 = titanic.sex == "female"
mask1.head(20)

0     False
1      True
2      True
3      True
4     False
5     False
6     False
7     False
8      True
9      True
10     True
11     True
12    False
13    False
14     True
15     True
16    False
17    False
18     True
19     True
Name: sex, dtype: bool

In [28]:
mask2 = titanic.age < 14
mask2.head(20)

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7      True
8     False
9     False
10     True
11    False
12    False
13    False
14    False
15    False
16     True
17    False
18    False
19    False
Name: age, dtype: bool

In [29]:
(mask1 | mask2).head(11)

0     False
1      True
2      True
3      True
4     False
5     False
6     False
7      True
8      True
9      True
10     True
dtype: bool

In [30]:
titanic.loc[mask1 | mask2]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
7,0,3,male,2.0,3,1,21.0750,S,
8,1,3,female,27.0,0,2,11.1333,S,
...,...,...,...,...,...,...,...,...,...
880,1,2,female,25.0,0,1,26.0000,S,
882,0,3,female,22.0,0,0,10.5167,S,
885,0,3,female,39.0,0,5,29.1250,Q,
887,1,1,female,19.0,0,0,30.0000,S,B


In [31]:
wom_or_chi = titanic.loc[mask1 | mask2, ["survived", "pclass", "sex", "age"]]

In [32]:
wom_or_chi.head()

Unnamed: 0,survived,pclass,sex,age
1,1,1,female,38.0
2,1,3,female,26.0
3,1,1,female,35.0
7,0,3,male,2.0
8,1,3,female,27.0


In [33]:
wom_or_chi.info()

<class 'pandas.core.frame.DataFrame'>
Index: 351 entries, 1 to 888
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   survived  351 non-null    int64  
 1   pclass    351 non-null    int64  
 2   sex       351 non-null    object 
 3   age       298 non-null    float64
dtypes: float64(1), int64(2), object(1)
memory usage: 13.7+ KB


In [34]:
wom_or_chi.describe()

Unnamed: 0,survived,pclass,age
count,351.0,351.0,298.0
mean,0.723647,2.205128,25.039161
std,0.447832,0.847232,15.314631
min,0.0,1.0,0.42
25%,0.0,1.0,14.125
50%,1.0,2.0,24.0
75%,1.0,3.0,35.0
max,1.0,3.0,63.0


In [None]:
titanic.describe()

### Advanced Filtering with between(), isin() and ~

In [35]:
import pandas as pd

In [36]:
summer = pd.read_csv("summer.csv")

In [37]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [38]:
og_1988 = summer.loc[summer.Year == 1988]

In [39]:
og_1988.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
18051,1988,Seoul,Aquatics,Diving,"MENA CAMPOS, Jesus",MEX,Men,10M Platform,Bronze
18052,1988,Seoul,Aquatics,Diving,"LOUGANIS, Gregory",USA,Men,10M Platform,Gold
18053,1988,Seoul,Aquatics,Diving,"XIONG, Ni",CHN,Men,10M Platform,Silver
18054,1988,Seoul,Aquatics,Diving,"WYLAND-WILLIAMS, Wendy Lian",USA,Women,10M Platform,Bronze
18055,1988,Seoul,Aquatics,Diving,"XU, Yan-Mei",CHN,Women,10M Platform,Gold


In [40]:
og_1988.tail()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
19592,1988,Seoul,Wrestling,Wrestling Gre-R,"KOMCHEV, Atanas Slavov",BUL,Men,82 - 90KG (Light-Heavyweight),Gold
19593,1988,Seoul,Wrestling,Wrestling Gre-R,"KOSKELA, Harri Matias",FIN,Men,82 - 90KG (Light-Heavyweight),Silver
19594,1988,Seoul,Wrestling,Wrestling Gre-R,"KOSLOWSKI, Dennis Marvin",USA,Men,90 - 100KG (Heavyweight),Bronze
19595,1988,Seoul,Wrestling,Wrestling Gre-R,"WRONSKI, Andrzej",POL,Men,90 - 100KG (Heavyweight),Gold
19596,1988,Seoul,Wrestling,Wrestling Gre-R,"HIMMEL, Gerhard",FRG,Men,90 - 100KG (Heavyweight),Silver


In [41]:
og_1988.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1546 entries, 18051 to 19596
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Year        1546 non-null   int64 
 1   City        1546 non-null   object
 2   Sport       1546 non-null   object
 3   Discipline  1546 non-null   object
 4   Athlete     1546 non-null   object
 5   Country     1546 non-null   object
 6   Gender      1546 non-null   object
 7   Event       1546 non-null   object
 8   Medal       1546 non-null   object
dtypes: int64(1), object(8)
memory usage: 120.8+ KB


In [42]:
og_since1992 = summer.loc[summer.Year >= 1992]

In [43]:
og_since1992.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
19597,1992,Barcelona,Aquatics,Diving,"XIONG, Ni",CHN,Men,10M Platform,Bronze
19598,1992,Barcelona,Aquatics,Diving,"SUN, Shuwei",CHN,Men,10M Platform,Gold
19599,1992,Barcelona,Aquatics,Diving,"DONIE, Scott R.",USA,Men,10M Platform,Silver
19600,1992,Barcelona,Aquatics,Diving,"CLARK, Mary Ellen",USA,Women,10M Platform,Bronze
19601,1992,Barcelona,Aquatics,Diving,"FU, Mingxia",CHN,Women,10M Platform,Gold


In [44]:
og_since1992.tail()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
31160,2012,London,Wrestling,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,Wrestling,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,Wrestling,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,Wrestling,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze
31164,2012,London,Wrestling,Wrestling Freestyle,"LIDBERG, Jimmy",SWE,Men,Wg 96 KG,Bronze


In [49]:
# print(summer.Year.unique())

summer.Year.between(1960, 1969).head()


0    False
1    False
2    False
3    False
4    False
Name: Year, dtype: bool

In [None]:
# og_60s = summer.loc[summer.Year.between(1960, 1969, inclusive=True)] # old

In [50]:
og_60s = summer.loc[summer.Year.between(1960, 1969, inclusive="both")] # new

In [51]:
og_60s.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
9792,1960,Rome,Aquatics,Diving,"PHELPS, Brian Eric",GBR,Men,10M Platform,Bronze
9793,1960,Rome,Aquatics,Diving,"WEBSTER, Robert David",USA,Men,10M Platform,Gold
9794,1960,Rome,Aquatics,Diving,"TOBIAN, Gary Milburn",USA,Men,10M Platform,Silver
9795,1960,Rome,Aquatics,Diving,"KRUTOVA, Ninel",URS,Women,10M Platform,Bronze
9796,1960,Rome,Aquatics,Diving,"KRÄMER-ENGEL-GULBIN, Ingrid",EUA,Women,10M Platform,Gold


In [52]:
og_60s.tail()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
12710,1968,Mexico,Wrestling,Wrestling Gre-R,"METZ, Lothar",GDR,Men,78 - 87KG (Middleweight),Gold
12711,1968,Mexico,Wrestling,Wrestling Gre-R,"OLEINIK, Valentin",URS,Men,78 - 87KG (Middleweight),Silver
12712,1968,Mexico,Wrestling,Wrestling Gre-R,"MARTINESCU, Nicolae",ROU,Men,87 - 97KG (Light-Heavyweight),Bronze
12713,1968,Mexico,Wrestling,Wrestling Gre-R,"RADEV, Boyan Aleksandrov",BUL,Men,87 - 97KG (Light-Heavyweight),Gold
12714,1968,Mexico,Wrestling,Wrestling Gre-R,"YAKOVENKO, Nikolai",URS,Men,87 - 97KG (Light-Heavyweight),Silver


In [53]:
my_favourite_games = [1972, 1996]

In [54]:
summer.Year.isin(my_favourite_games).head()

0    False
1    False
2    False
3    False
4    False
Name: Year, dtype: bool

In [55]:
og_72_96 = summer.loc[summer.Year.isin(my_favourite_games)]

In [56]:
og_72_96.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
12715,1972,Munich,Aquatics,Diving,"CAGNOTTO, Giorgio Franco",ITA,Men,10M Platform,Bronze
12716,1972,Munich,Aquatics,Diving,"DIBIASI, Klaus",ITA,Men,10M Platform,Gold
12717,1972,Munich,Aquatics,Diving,"RYDZE, Richard Anthony",USA,Men,10M Platform,Silver
12718,1972,Munich,Aquatics,Diving,"JANICKE, Marina",GDR,Women,10M Platform,Bronze
12719,1972,Munich,Aquatics,Diving,"KNAPE-LINDBERGH, Ulrika",SWE,Women,10M Platform,Gold


In [57]:
og_72_96.tail()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
23156,1996,Atlanta,Wrestling,Wrestling Gre-R,"OLEYNYK, Vyacheslav",UKR,Men,82 - 90KG (Light-Heavyweight),Gold
23157,1996,Atlanta,Wrestling,Wrestling Gre-R,"FAFINSKI, Jacek",POL,Men,82 - 90KG (Light-Heavyweight),Silver
23158,1996,Atlanta,Wrestling,Wrestling Gre-R,"LJUNGBERG, Mikael",SWE,Men,90 - 100KG (Heavyweight),Bronze
23159,1996,Atlanta,Wrestling,Wrestling Gre-R,"WRONSKI, Andrzej",POL,Men,90 - 100KG (Heavyweight),Gold
23160,1996,Atlanta,Wrestling,Wrestling Gre-R,"LISHTVAN, Sergey",BLR,Men,90 - 100KG (Heavyweight),Silver


In [58]:
og_not_72_96 = summer.loc[~summer.Year.isin(my_favourite_games)]

In [59]:
og_not_72_96.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [60]:
og_not_72_96.Year.unique()

array([1896, 1900, 1904, 1908, 1912, 1920, 1924, 1928, 1932, 1936, 1948,
       1952, 1956, 1960, 1964, 1968, 1976, 1980, 1984, 1988, 1992, 2000,
       2004, 2008, 2012])

### any() and all()

In [61]:
import pandas as pd

In [62]:
titanic = pd.read_csv("titanic.csv")

In [69]:
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
886,0,2,male,27.0,0,0,13.0,S,
887,1,1,female,19.0,0,0,30.0,S,B
888,0,3,female,,1,2,23.45,S,
889,1,1,male,26.0,0,0,30.0,C,C
890,0,3,male,32.0,0,0,7.75,Q,


In [64]:
titanic.sex == "male"

0       True
1      False
2      False
3      False
4       True
       ...  
886     True
887    False
888    False
889     True
890     True
Name: sex, Length: 891, dtype: bool

In [65]:
(titanic.sex == "male").any()
# The .any() method is used to check if any value in the boolean Series is True.
# If at least one value is True, .any() will return True. If all values are False, it will return False.

np.True_

In [66]:
(titanic.sex == "male").all()

np.False_

In [None]:
(titanic.age == 80.0).any()

np.int64(891)

In [75]:
# used to check if any value in the pandas Series is non-zero
pd.Series([0, 0, 0, 0, 0]).any()
pd.Series([-1, 0, 0, 0, 0]).any()

np.True_

In [71]:
titanic.fare.all()  # hecks if all values in the fare column are non-zero. If any value is 0, it will return False; otherwise, it returns True.

np.False_

### Removing Columns

In [76]:
import pandas as pd

In [77]:
summer = pd.read_csv("summer.csv")

In [78]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [79]:
summer.drop(columns = "Sport")

Unnamed: 0,Year,City,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...,...
31160,2012,London,Wrestling Freestyle,"JANIKOWSKI, Damian",POL,Men,Wg 84 KG,Bronze
31161,2012,London,Wrestling Freestyle,"REZAEI, Ghasem Gholamreza",IRI,Men,Wg 96 KG,Gold
31162,2012,London,Wrestling Freestyle,"TOTROV, Rustam",RUS,Men,Wg 96 KG,Silver
31163,2012,London,Wrestling Freestyle,"ALEKSANYAN, Artur",ARM,Men,Wg 96 KG,Bronze


In [81]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [None]:
summer.drop(columns = ["Sport", "Discipline"], inplace=True)
summer.head()

Unnamed: 0,Year,City,Athlete,Country,Gender,Event,Medal
0,1896,Athens,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [85]:
summer.drop(labels = "Event", axis = "columns", inplace= True)

In [86]:
#del summer["Event"]

In [87]:
summer.head()

Unnamed: 0,Year,City,Athlete,Country,Gender,Medal
0,1896,Athens,"HAJOS, Alfred",HUN,Men,Gold
1,1896,Athens,"HERSCHMANN, Otto",AUT,Men,Silver
2,1896,Athens,"DRIVAS, Dimitrios",GRE,Men,Bronze
3,1896,Athens,"MALOKINIS, Ioannis",GRE,Men,Gold
4,1896,Athens,"CHASAPIS, Spiridon",GRE,Men,Silver


In [88]:
summer = pd.read_csv("summer.csv")

In [89]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [90]:
summer = summer.loc[:,["Year", "City", "Athlete", "Country", "Gender", "Medal"]]

In [91]:
summer.head()

Unnamed: 0,Year,City,Athlete,Country,Gender,Medal
0,1896,Athens,"HAJOS, Alfred",HUN,Men,Gold
1,1896,Athens,"HERSCHMANN, Otto",AUT,Men,Silver
2,1896,Athens,"DRIVAS, Dimitrios",GRE,Men,Bronze
3,1896,Athens,"MALOKINIS, Ioannis",GRE,Men,Gold
4,1896,Athens,"CHASAPIS, Spiridon",GRE,Men,Silver


### Removing Rows

In [92]:
import pandas as pd

In [93]:
summer = pd.read_csv("summer.csv", index_col = "Athlete")

In [94]:
summer.head(10)

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Bronze
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,1200M Freestyle,Gold
"ANDREOU, Joannis",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,400M Freestyle,Bronze
"NEUMANN, Paul",1896,Athens,Aquatics,Swimming,AUT,Men,400M Freestyle,Gold


In [95]:
summer.drop(index = "HAJOS, Alfred")

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Bronze
...,...,...,...,...,...,...,...,...
"JANIKOWSKI, Damian",2012,London,Wrestling,Wrestling Freestyle,POL,Men,Wg 84 KG,Bronze
"REZAEI, Ghasem Gholamreza",2012,London,Wrestling,Wrestling Freestyle,IRI,Men,Wg 96 KG,Gold
"TOTROV, Rustam",2012,London,Wrestling,Wrestling Freestyle,RUS,Men,Wg 96 KG,Silver
"ALEKSANYAN, Artur",2012,London,Wrestling,Wrestling Freestyle,ARM,Men,Wg 96 KG,Bronze


In [96]:
summer.drop(index = ["HAJOS, Alfred","HERSCHMANN, Otto"], inplace = True)

In [97]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Bronze
"ANDREOU, Joannis",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Silver


In [98]:
summer.drop(labels = "DRIVAS, Dimitrios", axis = 0,  inplace = True)

In [99]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Bronze
"ANDREOU, Joannis",1896,Athens,Aquatics,Swimming,GRE,Men,1200M Freestyle,Silver
"CHOROPHAS, Efstathios",1896,Athens,Aquatics,Swimming,GRE,Men,400M Freestyle,Bronze


In [100]:
summer = summer.loc[summer.Year == 1996]

In [101]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"XIAO, Hailiang",1996,Atlanta,Aquatics,Diving,CHN,Men,10M Platform,Bronze
"SAUTIN, Dmitry",1996,Atlanta,Aquatics,Diving,RUS,Men,10M Platform,Gold
"HEMPEL, Jan",1996,Atlanta,Aquatics,Diving,GER,Men,10M Platform,Silver
"CLARK, Mary Ellen",1996,Atlanta,Aquatics,Diving,USA,Women,10M Platform,Bronze
"FU, Mingxia",1996,Atlanta,Aquatics,Diving,CHN,Women,10M Platform,Gold


In [102]:
summer = pd.read_csv("summer.csv", index_col = "Athlete")

In [103]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [104]:
mask1 = summer.Year == 1996
mask2 = summer.Sport == "Aquatics"

In [105]:
summer = summer.loc[~(mask1 | mask2)] 
# The year is not 1996, AND
# The sport is not "Aquatics".

In [106]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"LANE, Francis",1896,Athens,Athletics,Athletics,USA,Men,100M,Bronze
"SZOKOLYI, Alajos",1896,Athens,Athletics,Athletics,HUN,Men,100M,Bronze
"BURKE, Thomas",1896,Athens,Athletics,Athletics,USA,Men,100M,Gold
"HOFMANN, Fritz",1896,Athens,Athletics,Athletics,GER,Men,100M,Silver
"CURTIS, Thomas",1896,Athens,Athletics,Athletics,USA,Men,110M Hurdles,Gold


In [107]:
(summer.Year == 1996).value_counts()

Year
False    25398
Name: count, dtype: int64

In [108]:
1996 in summer.Year.values

False

In [None]:
"Aquatics" in summer.Sport.values
summer.Sport.isin(["Aquatics"]).any()

False

In [113]:
(summer.Sport == "Aquatics").any()

np.False_

### Adding new Columns to a DataFrame

In [114]:
import pandas as pd

In [115]:
titanic = pd.read_csv("titanic.csv")

In [116]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [117]:
titanic["Zeros"] = "Zero"

In [118]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Zeros
0,0,3,male,22.0,1,0,7.25,S,,Zero
1,1,1,female,38.0,1,0,71.2833,C,C,Zero
2,1,3,female,26.0,0,0,7.925,S,,Zero
3,1,1,female,35.0,1,0,53.1,S,C,Zero
4,0,3,male,35.0,0,0,8.05,S,,Zero


In [119]:
titanic.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'deck', 'Zeros'],
      dtype='object')

In [120]:
import pandas as pd

# Example Titanic DataFrame
titanic = pd.DataFrame({
    'Name': ['John', 'Alice', 'Bob', 'Diana'],
    'Age': [22, 30, 25, 35],
    'Survived': [1, 0, 1, 0]
})

# Check columns before adding 'Ones'
print("Before adding 'Ones' column:")
print(titanic.columns)

# Add a new column 'Ones' with the value 1 for all rows
titanic['Ones'] = 1

# Display the DataFrame after adding the 'Ones' column
print("\nAfter adding 'Ones' column:")
print(titanic.columns)  # Print columns to verify

print("\nThe DataFrame:")
print(titanic)

Before adding 'Ones' column:
Index(['Name', 'Age', 'Survived'], dtype='object')

After adding 'Ones' column:
Index(['Name', 'Age', 'Survived', 'Ones'], dtype='object')

The DataFrame:
    Name  Age  Survived  Ones
0   John   22         1     1
1  Alice   30         0     1
2    Bob   25         1     1
3  Diana   35         0     1


### Creating Columns based on other Columns

In [121]:
import pandas as pd

In [122]:
titanic = pd.read_csv("titanic.csv")

In [123]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [124]:
1912 - titanic.age

0      1890.0
1      1874.0
2      1886.0
3      1877.0
4      1877.0
        ...  
886    1885.0
887    1893.0
888       NaN
889    1886.0
890    1880.0
Name: age, Length: 891, dtype: float64

In [125]:
titanic["YoB"] = 1912 - titanic.age

In [126]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,YoB
0,0,3,male,22.0,1,0,7.25,S,,1890.0
1,1,1,female,38.0,1,0,71.2833,C,C,1874.0
2,1,3,female,26.0,0,0,7.925,S,,1886.0
3,1,1,female,35.0,1,0,53.1,S,C,1877.0
4,0,3,male,35.0,0,0,8.05,S,,1877.0


In [127]:
titanic.sibsp + titanic.parch

0      1
1      1
2      0
3      1
4      0
      ..
886    0
887    0
888    3
889    0
890    0
Length: 891, dtype: int64

In [128]:
titanic["relatives"] = titanic.sibsp + titanic.parch

In [129]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,YoB,relatives
0,0,3,male,22.0,1,0,7.25,S,,1890.0,1
1,1,1,female,38.0,1,0,71.2833,C,C,1874.0,1
2,1,3,female,26.0,0,0,7.925,S,,1886.0,0
3,1,1,female,35.0,1,0,53.1,S,C,1877.0,1
4,0,3,male,35.0,0,0,8.05,S,,1877.0,0


In [130]:
titanic.drop(columns = ["sibsp", "parch"], inplace = True)

In [131]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,fare,embarked,deck,YoB,relatives
0,0,3,male,22.0,7.25,S,,1890.0,1
1,1,1,female,38.0,71.2833,C,C,1874.0,1
2,1,3,female,26.0,7.925,S,,1886.0,0
3,1,1,female,35.0,53.1,S,C,1877.0,1
4,0,3,male,35.0,8.05,S,,1877.0,0


In [None]:
inflation_factor = 10

In [132]:
titanic.fare*10

0       72.500
1      712.833
2       79.250
3      531.000
4       80.500
        ...   
886    130.000
887    300.000
888    234.500
889    300.000
890     77.500
Name: fare, Length: 891, dtype: float64

In [133]:
titanic.fare = titanic.fare*10

In [134]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,fare,embarked,deck,YoB,relatives
0,0,3,male,22.0,72.5,S,,1890.0,1
1,1,1,female,38.0,712.833,C,C,1874.0,1
2,1,3,female,26.0,79.25,S,,1886.0,0
3,1,1,female,35.0,531.0,S,C,1877.0,1
4,0,3,male,35.0,80.5,S,,1877.0,0


### Adding Columns with insert()

In [135]:
import pandas as pd

In [136]:
titanic = pd.read_csv("titanic.csv")

In [137]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [138]:
titanic["Test"] = "Test"

In [139]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,Test
0,0,3,male,22.0,1,0,7.25,S,,Test
1,1,1,female,38.0,1,0,71.2833,C,C,Test
2,1,3,female,26.0,0,0,7.925,S,,Test
3,1,1,female,35.0,1,0,53.1,S,C,Test
4,0,3,male,35.0,0,0,8.05,S,,Test


In [140]:
relatives = titanic.sibsp + titanic.parch
relatives.head()

0    1
1    1
2    0
3    1
4    0
dtype: int64

In [141]:
titanic.insert(loc = 6, column = "relatives", value = relatives)

In [142]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,relatives,fare,embarked,deck,Test
0,0,3,male,22.0,1,0,1,7.25,S,,Test
1,1,1,female,38.0,1,0,1,71.2833,C,C,Test
2,1,3,female,26.0,0,0,0,7.925,S,,Test
3,1,1,female,35.0,1,0,1,53.1,S,C,Test
4,0,3,male,35.0,0,0,0,8.05,S,,Test


### Creating DataFrames from Scratch with pd.DataFrame()

In [149]:
import pandas as pd

#### Having Columns in place

In [150]:
player = ["Lionel Messi", "Cristiano Ronaldo", "Neymar Junior", "Kylian Mbappe", "Manuel Neuer"]

In [151]:
nationality = ["Argentina", "Portugal", "Brasil", "France", "Germany"]

In [152]:
club = ["FC Barcelona", "Juventus FC", "Paris SG", "Paris SG", "FC Bayern" ]

In [153]:
world_champion = [True, False, False, True, True]

In [154]:
height = [1.70, 1.87, 1.75, 1.78, 1.93]

In [155]:
goals = [45, 44, 28, 21, 0]

In [156]:
dic = {"Player":player, "Nationality":nationality, "Club":club, 
        "World_Champion":world_champion, "Height":height, "Goals_2018":goals
       }

In [158]:
dic

{'Player': ['Lionel Messi',
  'Cristiano Ronaldo',
  'Neymar Junior',
  'Kylian Mbappe',
  'Manuel Neuer'],
 'Nationality': ['Argentina', 'Portugal', 'Brasil', 'France', 'Germany'],
 'Club': ['FC Barcelona', 'Juventus FC', 'Paris SG', 'Paris SG', 'FC Bayern'],
 'World_Champion': [True, False, False, True, True],
 'Height': [1.7, 1.87, 1.75, 1.78, 1.93],
 'Goals_2018': [45, 44, 28, 21, 0]}

In [159]:
df = pd.DataFrame(data = dic)

In [160]:
df

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Lionel Messi,Argentina,FC Barcelona,True,1.7,45
1,Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
2,Neymar Junior,Brasil,Paris SG,False,1.75,28
3,Kylian Mbappe,France,Paris SG,True,1.78,21
4,Manuel Neuer,Germany,FC Bayern,True,1.93,0


In [161]:
players = df.set_index("Player")

In [162]:
players

Unnamed: 0_level_0,Nationality,Club,World_Champion,Height,Goals_2018
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lionel Messi,Argentina,FC Barcelona,True,1.7,45
Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
Neymar Junior,Brasil,Paris SG,False,1.75,28
Kylian Mbappe,France,Paris SG,True,1.78,21
Manuel Neuer,Germany,FC Bayern,True,1.93,0


#### Having Rows in place

In [163]:
list(zip(nationality, club, world_champion, height, goals))

[('Argentina', 'FC Barcelona', True, 1.7, 45),
 ('Portugal', 'Juventus FC', False, 1.87, 44),
 ('Brasil', 'Paris SG', False, 1.75, 28),
 ('France', 'Paris SG', True, 1.78, 21),
 ('Germany', 'FC Bayern', True, 1.93, 0)]

In [166]:
zipped = list(zip(nationality, club, world_champion, height, goals))

In [167]:
messi, ronaldo, neymar, mbappe, neuer = zipped

In [168]:
messi

('Argentina', 'FC Barcelona', True, 1.7, 45)

In [169]:
ronaldo

('Portugal', 'Juventus FC', False, 1.87, 44)

In [170]:
df = pd.DataFrame(data = [messi, ronaldo, neymar, mbappe, neuer],
             index = ["Lionel Messi", "Cristiano Ronaldo", "Neymar Junior", "Kylian Mbappe", "Manuel Neuer"],
             columns = ["Nationality", "Club", "World_Champion", "Height", "Goals_2018"]
            )

In [171]:
df

Unnamed: 0,Nationality,Club,World_Champion,Height,Goals_2018
Lionel Messi,Argentina,FC Barcelona,True,1.7,45
Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
Neymar Junior,Brasil,Paris SG,False,1.75,28
Kylian Mbappe,France,Paris SG,True,1.78,21
Manuel Neuer,Germany,FC Bayern,True,1.93,0


In [172]:
df2 = pd.Series(index = player, data = nationality, name = "Nationality").to_frame()

In [173]:
df2

Unnamed: 0,Nationality
Lionel Messi,Argentina
Cristiano Ronaldo,Portugal
Neymar Junior,Brasil
Kylian Mbappe,France
Manuel Neuer,Germany


In [174]:
df2["Club"] = club

In [175]:
df2

Unnamed: 0,Nationality,Club
Lionel Messi,Argentina,FC Barcelona
Cristiano Ronaldo,Portugal,Juventus FC
Neymar Junior,Brasil,Paris SG
Kylian Mbappe,France,Paris SG
Manuel Neuer,Germany,FC Bayern


### Adding new Rows (hands-on approach)

#### Adding one Row

In [176]:
players

Unnamed: 0_level_0,Nationality,Club,World_Champion,Height,Goals_2018
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lionel Messi,Argentina,FC Barcelona,True,1.7,45
Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
Neymar Junior,Brasil,Paris SG,False,1.75,28
Kylian Mbappe,France,Paris SG,True,1.78,21
Manuel Neuer,Germany,FC Bayern,True,1.93,0


In [177]:
players.reset_index(inplace= True)

In [178]:
players

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Lionel Messi,Argentina,FC Barcelona,True,1.7,45
1,Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44
2,Neymar Junior,Brasil,Paris SG,False,1.75,28
3,Kylian Mbappe,France,Paris SG,True,1.78,21
4,Manuel Neuer,Germany,FC Bayern,True,1.93,0


In [179]:
players.loc[5, :] = ["Sergio Ramos", "Spain", "Real Madrid", True, 1.84 ,5]

In [180]:
players

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Lionel Messi,Argentina,FC Barcelona,True,1.7,45.0
1,Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44.0
2,Neymar Junior,Brasil,Paris SG,False,1.75,28.0
3,Kylian Mbappe,France,Paris SG,True,1.78,21.0
4,Manuel Neuer,Germany,FC Bayern,True,1.93,0.0
5,Sergio Ramos,Spain,Real Madrid,True,1.84,5.0


#### Adding many Rows

In [181]:
new = pd.DataFrame(
    data = [["Mohamed Salah", "Egypt", "FC Liverpool", False, 1.75, 44],
            ["Luis Suarez", "Uruguay", "FC Barcelona", False, 1.82, 31]],
    columns = players.columns
)

In [182]:
new

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Mohamed Salah,Egypt,FC Liverpool,False,1.75,44
1,Luis Suarez,Uruguay,FC Barcelona,False,1.82,31


In [None]:
# players = players.append(new, ignore_index= True) # old

In [183]:
players = pd.concat([players, new], ignore_index= True) # new

In [184]:
players

Unnamed: 0,Player,Nationality,Club,World_Champion,Height,Goals_2018
0,Lionel Messi,Argentina,FC Barcelona,True,1.7,45.0
1,Cristiano Ronaldo,Portugal,Juventus FC,False,1.87,44.0
2,Neymar Junior,Brasil,Paris SG,False,1.75,28.0
3,Kylian Mbappe,France,Paris SG,True,1.78,21.0
4,Manuel Neuer,Germany,FC Bayern,True,1.93,0.0
5,Sergio Ramos,Spain,Real Madrid,True,1.84,5.0
6,Mohamed Salah,Egypt,FC Liverpool,False,1.75,44.0
7,Luis Suarez,Uruguay,FC Barcelona,False,1.82,31.0
