## 3.1 Importing a Data Set with the read_csv Function

In [51]:
import pandas as pd

In [52]:
# The two lines below are equivalent
pd.read_csv(filepath_or_buffer = "pokemon.csv")
pd.read_csv("pokemon.csv")

Unnamed: 0,Pokemon,Type
0,Bulbasaur,Grass / Poison
1,Ivysaur,Grass / Poison
2,Venusaur,Grass / Poison
3,Charmander,Fire
4,Charmeleon,Fire
...,...,...
804,Stakataka,Rock / Steel
805,Blacephalon,Fire / Ghost
806,Zeraora,Electric
807,Meltan,Steel


In [53]:
pd.read_csv("pokemon.csv", index_col = "Pokemon")

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Grass / Poison
Ivysaur,Grass / Poison
Venusaur,Grass / Poison
Charmander,Fire
Charmeleon,Fire
...,...
Stakataka,Rock / Steel
Blacephalon,Fire / Ghost
Zeraora,Electric
Meltan,Steel


In [54]:
pd.read_csv("pokemon.csv", index_col = "Pokemon").squeeze()

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Grass / Poison
Ivysaur,Grass / Poison
Venusaur,Grass / Poison
Charmander,Fire
Charmeleon,Fire
...,...
Stakataka,Rock / Steel
Blacephalon,Fire / Ghost
Zeraora,Electric
Meltan,Steel


In [55]:
pokemon = pd.read_csv(
    "pokemon.csv", index_col = "Pokemon"
).squeeze()

In [56]:
pd.read_csv("google_stocks.csv").head()

Unnamed: 0,Date,Close
0,2004-08-19,49.98
1,2004-08-20,53.95
2,2004-08-23,54.5
3,2004-08-24,52.24
4,2004-08-25,52.8


In [57]:
pd.read_csv("google_stocks.csv").dtypes

Unnamed: 0,0
Date,object
Close,float64


In [58]:
pd.read_csv("google_stocks.csv", parse_dates = ["Date"]).head()

Unnamed: 0,Date,Close
0,2004-08-19,49.98
1,2004-08-20,53.95
2,2004-08-23,54.5
3,2004-08-24,52.24
4,2004-08-25,52.8


In [59]:
pd.read_csv("google_stocks.csv", parse_dates = ["Date"]).dtypes

Unnamed: 0,0
Date,datetime64[ns]
Close,float64


In [60]:
pd.read_csv(
    "google_stocks.csv",
    parse_dates = ["Date"],
    index_col = "Date"
).squeeze().head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2004-08-19,49.98
2004-08-20,53.95
2004-08-23,54.5
2004-08-24,52.24
2004-08-25,52.8


In [61]:
google = pd.read_csv(
    "google_stocks.csv",
    parse_dates = ["Date"],
    index_col = "Date"
).squeeze()

In [62]:
pd.read_csv("revolutionary_war.csv").tail()

Unnamed: 0,Battle,Start Date,State
227,Siege of Fort Henry,9/11/1782,Virginia
228,Grand Assault on Gibraltar,9/13/1782,
229,Action of 18 October 1782,10/18/1782,
230,Action of 6 December 1782,12/6/1782,
231,Action of 22 January 1783,1/22/1783,Virginia


In [63]:
pd.read_csv(
    "revolutionary_war.csv",
    index_col = "Start Date",
    parse_dates = ["Start Date"],
).tail()

Unnamed: 0_level_0,Battle,State
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1782-09-11,Siege of Fort Henry,Virginia
1782-09-13,Grand Assault on Gibraltar,
1782-10-18,Action of 18 October 1782,
1782-12-06,Action of 6 December 1782,
1783-01-22,Action of 22 January 1783,Virginia


In [64]:
pd.read_csv(
    "revolutionary_war.csv",
    index_col = "Start Date",
    parse_dates = ["Start Date"],
    usecols = ["State", "Start Date"],
).tail().squeeze()

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
1782-09-11,Virginia
1782-09-13,
1782-10-18,
1782-12-06,
1783-01-22,Virginia


In [65]:
battles = pd.read_csv(
    "revolutionary_war.csv",
    index_col = "Start Date",
    parse_dates = ["Start Date"],
    usecols = ["State", "Start Date"]
).squeeze()

## 3.2 Sorting a Series

### 3.2.1 Sorting by Values with the sort_values Method

In [66]:
google.sort_values()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2004-09-03,49.82
2004-09-01,49.94
2004-08-19,49.98
2004-09-02,50.57
2004-09-07,50.60
...,...
2019-04-23,1264.55
2019-10-25,1265.13
2018-07-26,1268.33
2019-04-26,1272.18


In [67]:
google.sort_values(key=lambda x: x.index)

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2004-08-19,49.98
2004-08-20,53.95
2004-08-23,54.50
2004-08-24,52.24
2004-08-25,52.80
...,...
2019-10-21,1246.15
2019-10-22,1242.80
2019-10-23,1259.13
2019-10-24,1260.99


In [68]:
pokemon.sort_values()

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Illumise,Bug
Silcoon,Bug
Pinsir,Bug
Burmy,Bug
Wurmple,Bug
...,...
Tirtouga,Water / Rock
Relicanth,Water / Rock
Corsola,Water / Rock
Carracosta,Water / Rock


In [69]:
pokemon.sort_values(key = lambda x: x.index)

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Abomasnow,Grass / Ice
Abra,Psychic
Absol,Dark
Accelgor,Bug
Aegislash,Steel / Ghost
...,...
Zoroark,Dark
Zorua,Dark
Zubat,Poison / Flying
Zweilous,Dark / Dragon


In [70]:
pd.Series(data = ["Adam", "adam", "Ben"]).sort_values()

Unnamed: 0,0
0,Adam
2,Ben
1,adam


In [71]:
google.sort_values(ascending = False).head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2019-04-29,1287.58
2019-04-26,1272.18
2018-07-26,1268.33
2019-10-25,1265.13
2019-04-23,1264.55


In [72]:
pokemon.sort_values(ascending = False).head()

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Empoleon,Water / Steel
Corsola,Water / Rock
Relicanth,Water / Rock
Carracosta,Water / Rock
Tirtouga,Water / Rock


In [73]:
# The two lines below are equivalent
battles.sort_values()
battles.sort_values(na_position = "last")

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
1781-09-06,Connecticut
1779-07-05,Connecticut
1777-04-27,Connecticut
1777-09-03,Delaware
1777-05-17,Florida
...,...
1782-08-08,
1782-08-25,
1782-09-13,
1782-10-18,


In [74]:
battles.sort_values(na_position = "first")

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
1775-09-17,
1775-12-31,
1776-03-03,
1776-03-25,
1776-05-18,
...,...
1781-07-06,Virginia
1781-07-01,Virginia
1781-06-26,Virginia
1781-04-25,Virginia


In [75]:
battles.dropna().sort_values()

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
1781-09-06,Connecticut
1779-07-05,Connecticut
1777-04-27,Connecticut
1777-09-03,Delaware
1777-05-17,Florida
...,...
1781-07-06,Virginia
1781-07-01,Virginia
1781-06-26,Virginia
1781-04-25,Virginia


### 3.2.2 Sorting by Index with the sort_index Method

In [76]:
# The two lines below are equivalent
pokemon.sort_index()
pokemon.sort_index(ascending = True)

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Abomasnow,Grass / Ice
Abra,Psychic
Absol,Dark
Accelgor,Bug
Aegislash,Steel / Ghost
...,...
Zoroark,Dark
Zorua,Dark
Zubat,Poison / Flying
Zweilous,Dark / Dragon


In [77]:
battles.sort_index(na_position="first")

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
NaT,New Jersey
NaT,Virginia
NaT,
NaT,
1774-09-01,Massachusetts
...,...
1782-09-11,Virginia
1782-09-13,
1782-10-18,
1782-12-06,


In [78]:
battles.sort_index(na_position = "first").head()

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
NaT,New Jersey
NaT,Virginia
NaT,
NaT,
1774-09-01,Massachusetts


In [79]:
battles.sort_index(ascending = False).head()

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
1783-01-22,Virginia
1782-12-06,
1782-10-18,
1782-09-13,
1782-09-11,Virginia


### 3.2.3 Retrieving the Smallest and Largest Values with the nsmallest and nlargest Methods

In [80]:
google.sort_values(ascending = False).head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2019-04-29,1287.58
2019-04-26,1272.18
2018-07-26,1268.33
2019-10-25,1265.13
2019-04-23,1264.55


In [88]:
# The two lines below are equivalent
google.nlargest(n = 5)
google.nlargest()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2019-04-29,1287.58
2019-04-26,1272.18
2018-07-26,1268.33
2019-10-25,1265.13
2019-04-23,1264.55


In [91]:
# The two lines below are equivalent
google.nsmallest(n = 5)
google.nsmallest(5)

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2004-09-03,49.82
2004-09-01,49.94
2004-08-19,49.98
2004-09-02,50.57
2004-09-07,50.6


## 3.3 Overwriting a Series with the inplace Parameter

In [92]:
battles.head(3)

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
1774-09-01,Massachusetts
1774-12-14,New Hampshire
1775-04-19,Massachusetts


In [93]:
battles.sort_values().head(3)

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
1781-09-06,Connecticut
1779-07-05,Connecticut
1777-04-27,Connecticut


In [94]:
battles.head(3)

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
1774-09-01,Massachusetts
1774-12-14,New Hampshire
1775-04-19,Massachusetts


In [86]:
battles.head(3)

Unnamed: 0_level_0,State
Start Date,Unnamed: 1_level_1
1774-09-01,Massachusetts
1774-12-14,New Hampshire
1775-04-19,Massachusetts


In [98]:
battles = battles.sort_values(inplace=True)

In [100]:
battles

## 3.4 Counting Values with the value_counts Method

In [101]:
pokemon.head()

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Grass / Poison
Ivysaur,Grass / Poison
Venusaur,Grass / Poison
Charmander,Fire
Charmeleon,Fire


In [102]:
pokemon.value_counts()

Unnamed: 0_level_0,count
Type,Unnamed: 1_level_1
Normal,65
Water,61
Grass,38
Psychic,35
Fire,30
...,...
Normal / Dragon,1
Psychic / Steel,1
Rock / Poison,1
Fighting / Ghost,1


In [104]:
type(pokemon.value_counts())

In [103]:
len(pokemon.value_counts())

159

In [105]:
pokemon.nunique()

159

In [106]:
pokemon.value_counts(ascending = True)

Unnamed: 0_level_0,count
Type,Unnamed: 1_level_1
Steel / Ground,1
Fire / Rock,1
Rock / Dark,1
Psychic / Grass,1
Fire / Steel,1
...,...
Fire,30
Psychic,35
Grass,38
Water,61


In [109]:
pokemon.value_counts(normalize = True, ascending=True).head()

Unnamed: 0_level_0,proportion
Type,Unnamed: 1_level_1
Steel / Ground,0.001236
Fire / Rock,0.001236
Rock / Dark,0.001236
Psychic / Grass,0.001236
Fire / Steel,0.001236


In [110]:
pokemon.value_counts(normalize = True).head() * 100

Unnamed: 0_level_0,proportion
Type,Unnamed: 1_level_1
Normal,8.034611
Water,7.540173
Grass,4.697157
Psychic,4.326329
Fire,3.708282


In [111]:
(pokemon.value_counts(normalize = True) * 100).round(2)

Unnamed: 0_level_0,proportion
Type,Unnamed: 1_level_1
Normal,8.03
Water,7.54
Grass,4.70
Psychic,4.33
Fire,3.71
...,...
Normal / Dragon,0.12
Psychic / Steel,0.12
Rock / Poison,0.12
Fighting / Ghost,0.12


In [None]:
google.value_counts().head()

In [112]:
google.max()

1287.58

In [None]:
google.min()

In [113]:
buckets = [0, 200, 400, 600, 800, 1000, 1200, 1400]
google.value_counts(bins = buckets)

Unnamed: 0,count
"(200.0, 400.0]",1568
"(-0.001, 200.0]",595
"(400.0, 600.0]",575
"(1000.0, 1200.0]",406
"(600.0, 800.0]",380
"(800.0, 1000.0]",207
"(1200.0, 1400.0]",93


In [114]:
google.value_counts(bins = buckets).sort_index()

Unnamed: 0,count
"(-0.001, 200.0]",595
"(200.0, 400.0]",1568
"(400.0, 600.0]",575
"(600.0, 800.0]",380
"(800.0, 1000.0]",207
"(1000.0, 1200.0]",406
"(1200.0, 1400.0]",93


In [115]:
google.value_counts(bins = buckets, sort = False)

Unnamed: 0,count
"(-0.001, 200.0]",595
"(200.0, 400.0]",1568
"(400.0, 600.0]",575
"(600.0, 800.0]",380
"(800.0, 1000.0]",207
"(1000.0, 1200.0]",406
"(1200.0, 1400.0]",93


In [120]:
(google.max() - google.min())/6

206.29333333333332

In [125]:
import numpy as np
np.arange(google.min(), google.max()+1, (google.max() - google.min())/6)

array([  49.82      ,  256.11333333,  462.40666667,  668.7       ,
        874.99333333, 1081.28666667, 1287.58      ])

In [116]:
google.value_counts(bins = 6, sort = False)

Unnamed: 0,count
"(48.581, 256.113]",1204
"(256.113, 462.407]",1104
"(462.407, 668.7]",507
"(668.7, 874.993]",380
"(874.993, 1081.287]",292
"(1081.287, 1287.58]",337


In [126]:
battles.head()

AttributeError: 'NoneType' object has no attribute 'head'

In [None]:
battles.value_counts().head()

In [None]:
battles.value_counts(dropna = False).head()

In [None]:
battles.index

In [None]:
battles.index.value_counts()

## 3.5 Invoking a Function on Every Series Value with the apply Method

In [127]:
funcs = [len, max, min]

In [128]:
funcs = [len, max, min]

for current_func in funcs:
    print(current_func(google))

3824
1287.58
49.82


In [None]:
round(99.2)

In [None]:
round(99.49)

In [None]:
round(99.5)

In [130]:
? google.apply

In [129]:
# The two lines below are equivalent
google.apply(func = round)
google.apply(round)

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2004-08-19,50
2004-08-20,54
2004-08-23,54
2004-08-24,52
2004-08-25,53
...,...
2019-10-21,1246
2019-10-22,1243
2019-10-23,1259
2019-10-24,1261


In [136]:
google.apply(lambda x: np.add(x,
                              100))

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2004-08-19,149.98
2004-08-20,153.95
2004-08-23,154.50
2004-08-24,152.24
2004-08-25,152.80
...,...
2019-10-21,1346.15
2019-10-22,1342.80
2019-10-23,1359.13
2019-10-24,1360.99


In [137]:
def single_or_multi(pokemon_type):
    if "/" in pokemon_type:
        return "Multi"

    return "Single"

In [None]:
pokemon.head(4)

In [138]:
pokemon.apply(single_or_multi)

Unnamed: 0_level_0,Type
Pokemon,Unnamed: 1_level_1
Bulbasaur,Multi
Ivysaur,Multi
Venusaur,Multi
Charmander,Single
Charmeleon,Single
...,...
Stakataka,Multi
Blacephalon,Multi
Zeraora,Single
Meltan,Single


In [139]:
pokemon.apply(single_or_multi).value_counts()

Unnamed: 0_level_0,count
Type,Unnamed: 1_level_1
Multi,405
Single,404


## 3.6 Coding Challenge

### 3.6.1 Problems

### 3.6.2 Solutions

In [140]:
import datetime as dt
today = dt.datetime(2020, 12, 26)
today.strftime("%A")

'Saturday'

In [None]:
pd.read_csv("revolutionary_war.csv").head()

In [None]:
days_of_war = pd.read_csv(
    "revolutionary_war.csv",
    usecols = ["Start Date"],
    parse_dates = ["Start Date"],
    squeeze = True,
)

days_of_war.head()

In [None]:
def day_of_week(date):
    return date.strftime("%A")

**NOTE**: I've commented out the code below so that the Notebook can run without raising an error.

In [None]:
# days_of_war.apply(day_of_week)

In [None]:
days_of_war.dropna().apply(day_of_week)

In [None]:
days_of_war.dropna().apply(day_of_week).value_counts()

## 3.7 Summary