# Part 1: Pandas - from Zero to Hero

## DataFrame Basics III

### Sorting DataFrames (Version 1.0 Update)

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic

In [None]:
titanic.age.sort_values()

In [None]:
titanic.sort_values(by = "age")

In [None]:
titanic

In [None]:
titanic.sort_values(by = "age", ascending = False, inplace = True)

In [None]:
titanic

In [None]:
titanic.sort_values(by = ["pclass", "sex", "age"], ascending = [True, True, False], inplace= True)

In [None]:
pd.options.display.max_rows = 900

In [None]:
titanic

In [None]:
titanic.sort_index(ascending = True, inplace = True)

In [None]:
titanic

In [None]:
titanic.sort_values(by = "age").reset_index(drop = True)

In [None]:
titanic.sort_values(by = "age", ignore_index = True)

### Ranking DataFrames with rank()

In [1]:
import pandas as pd

In [9]:
sales = pd.Series([15, 32, 45, 21, 55, 15, 0],  index = ["Mo", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])

In [None]:
sales = pd.Series([15, 32, 45, 15, 55, 15, 0],  index = ["Mo", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])

In [3]:
sales

Mo     15
Tue    32
Wed    45
Thu    21
Fri    55
Sat    15
Sun     0
dtype: int64

In [10]:
sales.sort_values(ascending = False)

Fri    55
Wed    45
Tue    32
Thu    21
Sat    15
Mo     15
Sun     0
dtype: int64

In [11]:
sales.rank(ascending=False, method = "min").sort_values(ascending = True)

Fri    1.0
Wed    2.0
Tue    3.0
Thu    4.0
Mo     5.0
Sat    5.0
Sun    7.0
dtype: float64

In [16]:
sales.rank(ascending = True, method="min")

Mo     2.0
Tue    5.0
Wed    6.0
Thu    4.0
Fri    7.0
Sat    2.0
Sun    1.0
dtype: float64

In [None]:
sales.rank(ascending=False, method = "min", pct=True).sort_values()

In [17]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.fare.rank(ascending = False)

In [None]:
titanic["fare_rank"] = titanic.fare.rank(ascending = False, method="min")

In [None]:
titanic.head()

In [None]:
titanic.sort_values("fare", ascending= False)

In [None]:
titanic.drop(columns = "fare_rank", inplace= True)

In [21]:
titanic[['survived','pclass']].corr()

Unnamed: 0,survived,pclass
survived,1.0,-0.338481
pclass,-0.338481,1.0


### nunique(), nlargest() and nsmallest() with DataFrames

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.tail()

In [None]:
titanic.age.unique()

In [None]:
titanic.nunique(axis = 1, dropna=False)

In [None]:
titanic.nunique(dropna = False)

In [None]:
titanic.nlargest(n = 5, columns = "fare")

In [None]:
titanic.sort_values("fare", ascending = False).head(5)

In [None]:
titanic.nsmallest(n = 1, columns = "age")

In [None]:
titanic.loc[titanic.age.idxmin()]

### Summary Statistics and Accumulations

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.count(axis = "columns")

In [None]:
titanic.count(axis = 1)

In [None]:
titanic.mean(axis = 1)

In [None]:
titanic.sum(axis = 0)

In [None]:
titanic.head()

In [None]:
titanic.fare.cumsum(axis = 0)

In [None]:
titanic.corr()

In [None]:
titanic.survived.corr(titanic.pclass)

### The agg() method

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.mean()

In [None]:
titanic.agg("mean")

In [None]:
titanic.agg(["mean", "std"])

In [None]:
titanic.agg(["mean", "std", "min", "max", "median"])

In [None]:
titanic.agg({"survived": "mean", "age":["min", "max"]})

### apply(), map() and applymap()

In [1]:
import pandas as pd

In [2]:
sales = pd.read_csv("sales.csv", index_col = 0)

In [48]:
sales

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Steven,34,27,15,,33
Mike,45,9,74,87.0,12
Andi,17,33,54,8.0,29
Paul,87,67,27,45.0,7


In [49]:
sales1 = sales.copy()

In [52]:
sales1.reset_index(inplace=True)

In [53]:
sales1

Unnamed: 0,index,Mon,Tue,Wed,Thu,Fri
0,Steven,34,27,15,,33
1,Mike,45,9,74,87.0,12
2,Andi,17,33,54,8.0,29
3,Paul,87,67,27,45.0,7


In [63]:
sales1[['Mon','Tue']][2:3]

Unnamed: 0,Mon,Tue
2,17,33


In [68]:
sales[['Mon','Tue']]['Mike':'Andi']

Unnamed: 0,Mon,Tue
Mike,45,9
Andi,17,33


In [4]:
sales.min(axis = 0) 

Mon    17.0
Tue     9.0
Wed    15.0
Thu     8.0
Fri     7.0
dtype: float64

In [20]:
sales.loc['Andi',:].max()

54.0

In [None]:
sales.min(axis = 1)

In [45]:
def range(series):
    return series.max() - series.min()

In [46]:
sales.apply(lambda x: x.max() - x.min(), axis = 0)

Mon    70.0
Tue    58.0
Wed    59.0
Thu    79.0
Fri    26.0
dtype: float64

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
summer.Athlete.apply(lambda x: x[0])

In [None]:
summer.Athlete.map(lambda x: x[0])

In [None]:
summer.iloc[:,1:3].applymap(lambda x: x[0])

In [None]:
sales.applymap(lambda x: 0.4*x-5)

In [None]:
sales*0.4-5

### Hierarchical Indexing (MultiIndex) Intro

In [69]:
import pandas as pd

In [70]:
titanic = pd.read_csv("titanic.csv")

In [71]:
titanic

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.2500,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
4,0,3,male,35.0,0,0,8.0500,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.0750,S,
8,1,3,female,27.0,0,2,11.1333,S,
9,1,2,female,14.0,1,0,30.0708,C,


In [72]:
titanic = titanic.iloc[:50, :]

In [73]:
titanic.set_index(["pclass", "sex"], inplace = True)

In [74]:
titanic

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,male,0,22.0,1,0,7.25,S,
1,female,1,38.0,1,0,71.2833,C,C
3,female,1,26.0,0,0,7.925,S,
1,female,1,35.0,1,0,53.1,S,C
3,male,0,35.0,0,0,8.05,S,
3,male,0,,0,0,8.4583,Q,
1,male,0,54.0,0,0,51.8625,S,E
3,male,0,2.0,3,1,21.075,S,
3,female,1,27.0,0,2,11.1333,S,
2,female,1,14.0,1,0,30.0708,C,


In [75]:
titanic.sort_index(ascending = [True, True], inplace = True )

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [76]:
titanic

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [77]:
titanic.swaplevel()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
sex,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
female,1,1,38.0,1,0,71.2833,C,C
female,1,1,35.0,1,0,53.1,S,C
female,1,1,58.0,0,0,26.55,S,C
female,1,1,,1,0,146.5208,C,B
male,1,0,54.0,0,0,51.8625,S,E
male,1,1,28.0,0,0,35.5,S,A
male,1,0,19.0,3,2,263.0,S,C
male,1,0,40.0,0,0,27.7208,C,
male,1,0,28.0,1,0,82.1708,C,
male,1,0,42.0,1,0,52.0,S,


In [78]:
titanic

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [None]:
titanic.reset_index(inplace = True)

In [None]:
titanic

### Hierarchical Indexing (MultiIndex) Part 2

In [None]:
import pandas as pd

In [80]:
titanic = pd.read_csv("titanic.csv")

In [81]:
titanic = titanic.iloc[:50,]

In [82]:
titanic

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,
5,0,3,male,,0,0,8.4583,Q,
6,0,1,male,54.0,0,0,51.8625,S,E
7,0,3,male,2.0,3,1,21.075,S,
8,1,3,female,27.0,0,2,11.1333,S,
9,1,2,female,14.0,1,0,30.0708,C,


In [83]:
titanic = titanic.set_index(["pclass", "sex"]).sort_index(ascending = True)

In [84]:
titanic

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [85]:
titanic.loc[1]

Unnamed: 0_level_0,survived,age,sibsp,parch,fare,embarked,deck
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
female,1,38.0,1,0,71.2833,C,C
female,1,35.0,1,0,53.1,S,C
female,1,58.0,0,0,26.55,S,C
female,1,,1,0,146.5208,C,B
male,0,54.0,0,0,51.8625,S,E
male,1,28.0,0,0,35.5,S,A
male,0,19.0,3,2,263.0,S,C
male,0,40.0,0,0,27.7208,C,
male,0,28.0,1,0,82.1708,C,
male,0,42.0,1,0,52.0,S,


In [86]:
titanic.loc[[1,2]]

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [87]:
titanic.loc[:2]

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [88]:
titanic.loc[1, "female"]

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B


In [89]:
titanic.loc[1, "female", "age"]

IndexingError: Too many indexers

In [None]:
titanic.loc[(1,"female")]

In [None]:
titanic.loc[(1,"female"), "age"]

In [None]:
titanic.loc[([1,2],"female"), ["age", "fare"]]

In [None]:
titanic.loc[([1, 2],"female"), :]

In [90]:
titanic

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,


In [93]:
titanic.loc[[1,'female'],'age']

pclass  sex   
1       female    38.0
        female    35.0
        female    58.0
        female     NaN
        male      54.0
        male      28.0
        male      19.0
        male      40.0
        male      28.0
        male      42.0
Name: age, dtype: float64

In [98]:
titanic[['age','embarked']][1:]

Unnamed: 0_level_0,Unnamed: 1_level_0,age,embarked
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1
1,female,35.0,S
1,female,58.0,S
1,female,,C
1,male,54.0,S
1,male,28.0,S
1,male,19.0,S
1,male,40.0,C
1,male,28.0,C
1,male,42.0,S
2,female,14.0,C


In [153]:
titanic.loc[(slice(1,2), slice('female','male')),['age','deck']]

Unnamed: 0_level_0,Unnamed: 1_level_0,age,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1
1,female,38.0,C
1,female,35.0,C
1,female,58.0,C
1,female,,B
1,male,54.0,E
1,male,28.0,A
1,male,19.0,C
1,male,40.0,
1,male,28.0,
1,male,42.0,


In [99]:
titanic.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,female,1,38.0,1,0,71.2833,C,C
1,female,1,35.0,1,0,53.1,S,C
1,female,1,58.0,0,0,26.55,S,C
1,female,1,,1,0,146.5208,C,B
1,male,0,54.0,0,0,51.8625,S,E


In [128]:
titanic.loc[([1,3],'male'),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,deck
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,male,0,54.0,0,0,51.8625,S,E
1,male,1,28.0,0,0,35.5,S,A
1,male,0,19.0,3,2,263.0,S,C
1,male,0,40.0,0,0,27.7208,C,
1,male,0,28.0,1,0,82.1708,C,
1,male,0,42.0,1,0,52.0,S,
3,male,0,22.0,1,0,7.25,S,
3,male,0,35.0,0,0,8.05,S,
3,male,0,,0,0,8.4583,Q,
3,male,0,2.0,3,1,21.075,S,


### String Operations Intro / Refresher

In [None]:
"Hello World"

In [None]:
type("Hello World")

In [None]:
hello = "Hello World"
hello

In [None]:
len(hello)

In [None]:
hello.lower()

In [None]:
hello.upper()

In [None]:
hello.title()

In [None]:
hello.split(" ")

In [None]:
hello.replace("Hello", "Hi")

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
names = summer.loc[:9, "Athlete"].copy()

In [None]:
names

In [None]:
names.dtypes

In [None]:
names[0]

In [None]:
type(names[0])

In [None]:
names.str.lower()

### String Operations in Pandas

In [1]:
import pandas as pd

In [2]:
summer = pd.read_csv("summer.csv")

In [19]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,Swimming,"HAJOS, Alfred",HUN,Men,100M Freestyle,Gold
1,1896,Athens,Aquatics,Swimming,"HERSCHMANN, Otto",AUT,Men,100M Freestyle,Silver
2,1896,Athens,Aquatics,Swimming,"DRIVAS, Dimitrios",GRE,Men,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,Swimming,"MALOKINIS, Ioannis",GRE,Men,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,Swimming,"CHASAPIS, Spiridon",GRE,Men,100M Freestyle For Sailors,Silver


In [32]:
n1 = summer['Gender']

In [33]:
n2 = summer.loc[:,'Discipline']

In [36]:
n1.replace('Men','dog',inplace=True)

In [34]:
n2.replace('Swimming','run',inplace=True)

In [40]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,run,"HAJOS, Alfred",HUN,dog,100M Freestyle,Gold
1,1896,Athens,Aquatics,run,"HERSCHMANN, Otto",AUT,dog,100M Freestyle,Silver
2,1896,Athens,Aquatics,run,"DRIVAS, Dimitrios",GRE,dog,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,run,"MALOKINIS, Ioannis",GRE,dog,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,run,"CHASAPIS, Spiridon",GRE,dog,100M Freestyle For Sailors,Silver


<b>Notice how n1 and n2 were NEW series made from Original Summer Dataframe.<br>
But changes to n1 and n2 with Inplace=True changed the original dataframe summer as well <br>
Therefore always use <i><font color = 'Blue'>.copy()</i></b>

In [44]:
n3 = summer['Gender'].copy()

In [45]:
n3

0        dog
1        dog
2        dog
3        dog
4        dog
5        dog
6        dog
7        dog
8        dog
9        dog
10       dog
11       dog
12       dog
13       dog
14       dog
15       dog
16       dog
17       dog
18       dog
19       dog
20       dog
21       dog
22       dog
23       dog
24       dog
25       dog
26       dog
27       dog
28       dog
29       dog
        ... 
31135    dog
31136    dog
31137    dog
31138    dog
31139    dog
31140    dog
31141    dog
31142    dog
31143    dog
31144    dog
31145    dog
31146    dog
31147    dog
31148    dog
31149    dog
31150    dog
31151    dog
31152    dog
31153    dog
31154    dog
31155    dog
31156    dog
31157    dog
31158    dog
31159    dog
31160    dog
31161    dog
31162    dog
31163    dog
31164    dog
Name: Gender, Length: 31165, dtype: object

In [46]:
n3.replace('dog','cat',inplace=True)

In [48]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Aquatics,run,"HAJOS, Alfred",HUN,dog,100M Freestyle,Gold
1,1896,Athens,Aquatics,run,"HERSCHMANN, Otto",AUT,dog,100M Freestyle,Silver
2,1896,Athens,Aquatics,run,"DRIVAS, Dimitrios",GRE,dog,100M Freestyle For Sailors,Bronze
3,1896,Athens,Aquatics,run,"MALOKINIS, Ioannis",GRE,dog,100M Freestyle For Sailors,Gold
4,1896,Athens,Aquatics,run,"CHASAPIS, Spiridon",GRE,dog,100M Freestyle For Sailors,Silver


In [51]:
summer[summer['Sport']=='Aquatics']['Sport'] = 'Water'        #CHAINED INDEXING

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [54]:
summer.loc[summer['Sport']=='Aquatics','Sport'] = 'Water'

In [55]:
summer.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country,Gender,Event,Medal
0,1896,Athens,Water,run,"HAJOS, Alfred",HUN,dog,100M Freestyle,Gold
1,1896,Athens,Water,run,"HERSCHMANN, Otto",AUT,dog,100M Freestyle,Silver
2,1896,Athens,Water,run,"DRIVAS, Dimitrios",GRE,dog,100M Freestyle For Sailors,Bronze
3,1896,Athens,Water,run,"MALOKINIS, Ioannis",GRE,dog,100M Freestyle For Sailors,Gold
4,1896,Athens,Water,run,"CHASAPIS, Spiridon",GRE,dog,100M Freestyle For Sailors,Silver


In [26]:
names = summer.loc[:9, "Athlete"].copy()

In [None]:
names

In [None]:
names.str.lower()

In [None]:
names.str.title()

In [None]:
summer.Event.str.split(" ", n = 2, expand= True)

In [None]:
summer[summer.Event.str.contains("100M")]