<a href="https://colab.research.google.com/github/jack-cao-623/python_learning/blob/main/dataframes_03_data_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DataFrames Part 3, Data Extraction
- .iloc[], .loc[]

## First lesson; set up

In [2]:
# libraries needed
import numpy as np
import pandas as pd

In [3]:
# load data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head(n = 5)

# one row per bond movie

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [4]:
# non-null counts and data type
bond.info()
  # box office, budget, and actor salary are in millions

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Film               26 non-null     object 
 1   Year               26 non-null     int64  
 2   Actor              26 non-null     object 
 3   Director           26 non-null     object 
 4   Box Office         26 non-null     float64
 5   Budget             26 non-null     float64
 6   Bond Actor Salary  18 non-null     float64
dtypes: float64(3), int64(1), object(3)
memory usage: 1.5+ KB


In [5]:
# reproduce non-null counts
(
    bond
      .notnull()
      .sum(axis = 'index')      # count of non null values
      .div(bond.shape[0])       # percent of non null values
)

Film                 1.000000
Year                 1.000000
Actor                1.000000
Director             1.000000
Box Office           1.000000
Budget               1.000000
Bond Actor Salary    0.692308
dtype: float64

## .set_index() and .reset_index() methods

In [6]:
# current axes
bond.axes

[RangeIndex(start=0, stop=26, step=1),
 Index(['Film', 'Year', 'Actor', 'Director', 'Box Office', 'Budget',
        'Bond Actor Salary'],
       dtype='object')]

In [7]:
# current row axis
bond.index

RangeIndex(start=0, stop=26, step=1)

In [8]:
# one row per Film
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [9]:
# make Film the index
bond.set_index(keys = 'Film') # can also do index_col parameter in pd.read_csv()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


In [10]:
# make Film the index and then undo it
(
    bond
      .set_index('Film')
      .reset_index()
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
8,Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
9,The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


In [11]:
# for reset index, boolean drop paramter for dropping the index entirely

In [12]:
# make Year the index
bond_year_index = (
    bond
      .set_index('Year')
)

bond_year_index.head()

Unnamed: 0_level_0,Film,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1962,Dr. No,Sean Connery,Terence Young,448.8,7.0,0.6
1963,From Russia with Love,Sean Connery,Terence Young,543.8,12.6,1.6
1964,Goldfinger,Sean Connery,Guy Hamilton,820.4,18.6,3.2
1965,Thunderball,Sean Connery,Terence Young,848.1,41.9,4.7
1967,Casino Royale,David Niven,Ken Hughes,315.0,85.0,


## .loc[ ] is neither an attribute nor a method; it's a property
- loc for label location; but integer position is also a label
- Inclusive of last value, unlike .iloc[]
- Better to do .sort_index() for optimization; memory usage is better

In [13]:
# load data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

In [14]:
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [15]:
bond.loc[0:5,:]   # index 0 thru 5, inclusive (0:5), all columns (:)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [16]:
(
    bond
      .set_index('Film')                              # set Film column/Series as index
      .sort_index(axis = 0, ascending = True)         # sort the DataFrame by index, alphabetically
      .sort_index(axis = 1, ascending = True)         # sort the DataFrame by column, alphabetically; Film is index, not a column
      .loc['GoldenEye':, :]                           # grab all indices from GoldenEye thru to end, all columns
)

Unnamed: 0_level_0,Actor,Bond Actor Salary,Box Office,Budget,Director,Year
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GoldenEye,Pierce Brosnan,5.1,518.5,76.9,Martin Campbell,1995
Goldfinger,Sean Connery,3.2,820.4,18.6,Guy Hamilton,1964
Licence to Kill,Timothy Dalton,7.9,250.9,56.7,John Glen,1989
Live and Let Die,Roger Moore,,460.3,30.8,Guy Hamilton,1973
Moonraker,Roger Moore,,535.0,91.5,Lewis Gilbert,1979
Never Say Never Again,Sean Connery,,380.0,86.0,Irvin Kershner,1983
Octopussy,Roger Moore,7.8,373.8,53.9,John Glen,1983
On Her Majesty's Secret Service,George Lazenby,0.6,291.5,37.3,Peter R. Hunt,1969
Quantum of Solace,Daniel Craig,8.1,514.2,181.4,Marc Forster,2008
Skyfall,Daniel Craig,14.5,943.5,170.2,Sam Mendes,2012


In [17]:
# integer position is also a label
bond.loc[20:, 'Film':'Actor']

Unnamed: 0,Film,Year,Actor
20,The World Is Not Enough,1999,Pierce Brosnan
21,Die Another Day,2002,Pierce Brosnan
22,Casino Royale,2006,Daniel Craig
23,Quantum of Solace,2008,Daniel Craig
24,Skyfall,2012,Daniel Craig
25,Spectre,2015,Daniel Craig


In [18]:
# Get back info about Goldfinger film
(
    bond
      .set_index('Film')           # Film Series/column is the index
      .loc['Goldfinger', :]        # extract info about this film, all columns (:)
)

# output is a Series

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

In [19]:
# index labels don't need to be unique

# two rows where Film is Casino Royale
(
    bond
      [bond['Film'] == 'Casino Royale']
      .set_index('Film')
)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3


In [20]:
# make Film the index and extract using .loc[]
(
    bond
      .set_index('Film')
      .loc['Casino Royale', :]
)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3


In [21]:
# .loc[] suppports list slicing
(
    bond
      .set_index('Film')
      .sort_index()                                # if you want to sort films alphabetically
      .loc['Die Another Day':'Skyfall':2, :]       # list slicing; #2 refers to step by 2, i.e., every other; defaults to 1
)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1


In [22]:
# set Film as index, sort alphabetically, extract all Films from start up through Die Another Day
(
    bond
      .set_index('Film')
      .sort_index()
      .loc[:'Die Another Day']
)


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [23]:
# set Film as index, sort alphabetically, extract all Films from Die Another Day to end
(
    bond
      .set_index('Film')
      .sort_index()
      .loc['Die Another Day':]
)

Output = None
# above suppresses output

In [24]:
# specific films as a list
(
    bond
      .set_index('Film')
      .sort_index()
      .loc[['Spectre', 'GoldenEye']]         # every element in list must exist; otherwise error 
)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1


In [25]:
# check if a Film is in dataset
'Dr. No' in bond['Film']

False

In [26]:
# another way to do it; but not the real way
sum(bond['Film'] == 'Dr. No')

1

In [27]:
'apple' in ['apple', 'banana', 'pear']

True

## .iloc[ ] accessor

In [28]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [29]:
# extract even indices below 10
bond.iloc[:10:2]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
8,Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,


In [30]:
# extract data in index position 0
bond.iloc[0]

Film                        Dr. No
Year                          1962
Actor                 Sean Connery
Director             Terence Young
Box Office                   448.8
Budget                         7.0
Bond Actor Salary              0.6
Name: 0, dtype: object

In [31]:
# extract data in index positions 15, 21, and 7
bond.iloc[[15, 21, 7]]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
15,A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
21,Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8


In [32]:
# extract data in index positions 4 thru 8, inclusive
bond.iloc[4:9]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
8,Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,


In [33]:
# note that unlike .loc[], .iloc[] excludes the last/right side

In [34]:
# from 20 to end
bond.iloc[20:]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
20,The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
21,Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
22,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
23,Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
25,Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,


In [35]:
# from beginning to 4, exclude index position 4
bond.iloc[:4]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [36]:
# string index labels have a position, can use .iloc[]
(
    bond
      .set_index('Film')
      .sort_index()
      .iloc[0]                      # index in position 0 is called 'A View to a Kill'; same as below
      #.loc['A View to a Kill']
)

Year                        1985
Actor                Roger Moore
Director               John Glen
Box Office                 275.2
Budget                      54.5
Bond Actor Salary            9.1
Name: A View to a Kill, dtype: object

## Second arguments in .loc[ ] and .iloc []

In [37]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [38]:
# set Film as index and sort index alphabetically
bond = (
    bond
      .set_index('Film')
      .sort_index()
)

bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [39]:
# who played James Bond in Moonraker?
bond.loc['Moonraker', 'Actor']
  # returns the cell's value

'Roger Moore'

In [40]:
# who directed Moonraker?
bond.loc['Moonraker', 'Director']

'Lewis Gilbert'

In [41]:
# multiple columns
bond.loc['Moonraker', ['Director', 'Box Office']]

Director      Lewis Gilbert
Box Office            535.0
Name: Moonraker, dtype: object

In [42]:
# multiple indices and columns
bond.loc[['Moonraker', 'A View to a Kill'], ['Director', 'Box Office']]

Unnamed: 0_level_0,Director,Box Office
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Moonraker,Lewis Gilbert,535.0
A View to a Kill,John Glen,275.2


In [43]:
# playing around
bond.loc['Casino Royale':'Dr. No', 'Year':'Budget']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0


In [44]:
# more playing around
bond.iloc[14:20, :2]

Unnamed: 0_level_0,Year,Actor
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Octopussy,1983,Roger Moore
On Her Majesty's Secret Service,1969,George Lazenby
Quantum of Solace,2008,Daniel Craig
Skyfall,2012,Daniel Craig
Spectre,2015,Daniel Craig
The Living Daylights,1987,Timothy Dalton


## Set a new value for a specific cell or cells in a row

In [45]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [46]:
# replace Thunderball Sean Connery with Sir Sean Connery
bond.loc[3, 'Actor'] = 'Sir Sean Connery'
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sir Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [47]:
# change first 3 values in Budget Series/column to value * 1k
bond.loc[[0, 1, 2], 'Budget'] = [7 * 1000, 12.6 * 1000, 18.6 * 1000]
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7000.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12600.0,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18600.0,3.2
3,Thunderball,1965,Sir Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


## Set multiple values in a DataFrame

In [48]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [49]:
# make Film the index and sort index alphabetically
bond = (
    bond
      .set_index('Film')
      .sort_index()
)

bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [50]:
# for all 'Sean Connery' occurrences in Actor Series/column, change to 'Sir Sean Connery'

bond['Actor'] == 'Sean Connery'    # whether Actor is Sean Conery, True, or not, False

Film
A View to a Kill                   False
Casino Royale                      False
Casino Royale                      False
Diamonds Are Forever                True
Die Another Day                    False
Dr. No                              True
For Your Eyes Only                 False
From Russia with Love               True
GoldenEye                          False
Goldfinger                          True
Licence to Kill                    False
Live and Let Die                   False
Moonraker                          False
Never Say Never Again               True
Octopussy                          False
On Her Majesty's Secret Service    False
Quantum of Solace                  False
Skyfall                            False
Spectre                            False
The Living Daylights               False
The Man with the Golden Gun        False
The Spy Who Loved Me               False
The World Is Not Enough            False
Thunderball                         True
Tomorrow Ne

In [51]:
# for all 'Sean Connery' occurrences in Actor Series/column, change to 'Sir Sean Connery'
(
    bond
      .loc[bond['Actor'] == 'Sean Connery', 'Actor']       # rows where Actor is Sean Connery, Actor column
) = 'Sir Sean Connery'                                     # change value to Sir Sean Connery

# check
bond['Actor'].value_counts()

Roger Moore         7
Sir Sean Connery    7
Daniel Craig        4
Pierce Brosnan      4
Timothy Dalton      2
David Niven         1
George Lazenby      1
Name: Actor, dtype: int64

In [52]:
bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sir Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sir Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sir Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [53]:
# use .loc[] or .iloc[] to change original dataset

## .rename() method
- Rename index labels or columns in DataFrame

In [54]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [55]:
# make Film the index and sort alpbabetically
bond = (
    bond
      .set_index('Film')
      .sort_index()
)

bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [56]:
# rename Bond Actor Salary to bond_actor_salary
(
    bond
      .rename(
          {'Bond Actor Salary': 'bond_actory_salary'},      # key value pair dictionary; key is old, value is new
          axis = 'columns'
      )
)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,bond_actory_salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [57]:
# above is equivalent to
(
    bond
      .rename(columns = {'Bond Actor Salary': 'bond_actor_salary'})
      .head()
)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,bond_actor_salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [58]:
# rename GoldenEye to 'first brosnan movie'
(
    bond
      .rename(
          {'GoldenEye': 'first brosnan movie'}, 
          axis = 'index'
      )
)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
first brosnan movie,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [59]:
# above is equivalent to
(
    bond
      .rename(
          index = {'GoldenEye': 'first brosnan movie'}
      )
      .head()
)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [60]:
# change 'Box Office' to 'revenue' and 'Year' to 'release_date'
(
    bond
      .rename(
          columns = {'Box Office': 'revenue', 'Year': 'release_date'}
      )
)

Unnamed: 0_level_0,release_date,Actor,Director,revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [61]:
# to change all column names
bond.columns

Index(['Year', 'Actor', 'Director', 'Box Office', 'Budget',
       'Bond Actor Salary'],
      dtype='object')

In [62]:
# conver to all all lower case
bond.columns = bond.columns.str.lower()

In [63]:
bond.head()

Unnamed: 0_level_0,year,actor,director,box office,budget,bond actor salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


## .drop() method
- delete rows or columns from a DataFrame

In [64]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [65]:
# make Film the index and sort alphabetically
bond = (
    bond
      .set_index('Film')
      .sort_index()
)

bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [66]:
# remove A View to a Kill
bond_one_less_row = (
    bond
      .drop(labels = 'A View to a Kill', axis = 'index')
)

bond_one_less_row.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6


In [67]:
# remove Actor column
bond_no_actor_column = (
    bond
      .drop(labels = 'Actor', axis = 'columns')
)

bond_no_actor_column.head()

Unnamed: 0_level_0,Year,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,John Glen,275.2,54.5,9.1
Casino Royale,2006,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Lee Tamahori,465.4,154.2,17.9


In [68]:
# remove some Daniel Craig movies
bond_no_daniel_craig = (
    bond
      .drop(
          labels = ['Skyfall', 'Spectre'],
          axis = 'index'
      )
)

bond_no_daniel_craig

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


### .pop() method changes in place

In [69]:
# remove Actor column and save it as its own Series
actor = bond.pop('Actor')

# Actor column is gone
bond.head()

Unnamed: 0_level_0,Year,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,John Glen,275.2,54.5,9.1
Casino Royale,2006,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Lee Tamahori,465.4,154.2,17.9


In [70]:
# actor series
actor

Film
A View to a Kill                      Roger Moore
Casino Royale                        Daniel Craig
Casino Royale                         David Niven
Diamonds Are Forever                 Sean Connery
Die Another Day                    Pierce Brosnan
Dr. No                               Sean Connery
For Your Eyes Only                    Roger Moore
From Russia with Love                Sean Connery
GoldenEye                          Pierce Brosnan
Goldfinger                           Sean Connery
Licence to Kill                    Timothy Dalton
Live and Let Die                      Roger Moore
Moonraker                             Roger Moore
Never Say Never Again                Sean Connery
Octopussy                             Roger Moore
On Her Majesty's Secret Service    George Lazenby
Quantum of Solace                    Daniel Craig
Skyfall                              Daniel Craig
Spectre                              Daniel Craig
The Living Daylights               Timothy Da

### del keyword

In [71]:
# delete Year column
del bond['Year']

# no more Year column
bond.head()

Unnamed: 0_level_0,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A View to a Kill,John Glen,275.2,54.5,9.1
Casino Royale,Martin Campbell,581.5,145.3,3.3
Casino Royale,Ken Hughes,315.0,85.0,
Diamonds Are Forever,Guy Hamilton,442.5,34.7,5.8
Die Another Day,Lee Tamahori,465.4,154.2,17.9


## .sample() method

In [72]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [73]:
# randomly sample 1 row
bond.sample(n = 1)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [74]:
# randomly sample 5 rows
bond.sample(n = 5)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
18,GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
9,The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,
17,Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9


In [75]:
# randomly sample 10% of rows
bond.sample(frac = 0.10)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
9,The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,
19,Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0
17,Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9


In [76]:
# randomly sample 25% of rows
bond.sample(frac = 0.25)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
12,For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
23,Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
10,The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6


In [77]:
# randomly sample 2 columns
bond.sample(n = 2, axis = 'columns')

Unnamed: 0,Budget,Year
0,7.0,1962
1,12.6,1963
2,18.6,1964
3,41.9,1965
4,85.0,1967
5,59.9,1967
6,37.3,1969
7,34.7,1971
8,30.8,1973
9,27.7,1974


## .nsmallest() and .nlargest() methods
- return n rows from DataFrame with with largest and smallest values of a specified column
- also multiple columns, though not sure how this works 

In [78]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [79]:
# 3 highest grossing bond films
(
    bond
      .nlargest(3, 'Box Office')
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [80]:
# above is equivalent to:
(
    bond
      .sort_values('Box Office', ascending = False)
      .head(3)
)

# but .nlargest() and .nsmallest() is more memory efficient and faster

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [81]:
# 5 smallest bond actor salraies
(
    bond
      .nsmallest(5, 'Bond Actor Salary')
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
22,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3


## .where() method
- for filtering, but returns *entire* dataframe
  - rows that match have values
  - rows that don't match are NULL

In [82]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [83]:
# bond films where actor is Sean Connery
# note that rows that don't match boolean critiera are null
bond.where(bond['Actor'] == 'Sean Connery')

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962.0,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963.0,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965.0,Sean Connery,Terence Young,848.1,41.9,4.7
4,,,,,,,
5,You Only Live Twice,1967.0,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,,,,,,,
7,Diamonds Are Forever,1971.0,Sean Connery,Guy Hamilton,442.5,34.7,5.8
8,,,,,,,
9,,,,,,,


In [84]:
# for just movies that Sean Connery was Bond in
bond[bond['Actor'] == 'Sean Connery']

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
13,Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [85]:
# another example
# show rows where box office > 800
bond.where(bond['Box Office'] > 800)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,,,,,,,
1,,,,,,,
2,Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965.0,Sean Connery,Terence Young,848.1,41.9,4.7
4,,,,,,,
5,,,,,,,
6,,,,,,,
7,,,,,,,
8,,,,,,,
9,,,,,,,


In [86]:
# multiple conditions
(
    bond
      .where(
          (bond['Actor'] == 'Sean Connery') 
          & (bond['Box Office'] > 500)
      )
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,,,,,,,
1,From Russia with Love,1963.0,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965.0,Sean Connery,Terence Young,848.1,41.9,4.7
4,,,,,,,
5,You Only Live Twice,1967.0,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,,,,,,,
7,,,,,,,
8,,,,,,,
9,,,,,,,


## filtering using .query() method
- requires string input; double or single quotes doesn't matter
- back ticks `` around column names with spaces
- quotes around column values
- and instead of &; case sensitive must be lower case
- or instead of | case sensitive must be lower case

In [87]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [88]:
# Sean Connery Bond movies
(
    bond
      .query("Actor == 'Sean Connery'")    # order of double quotes vs. single quotes doesn't matter
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
13,Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [89]:
(
    bond
      .query('Actor == "Sean Connery"')    # order of double quotes vs. single quotes doesn't matter
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
13,Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [90]:
# above is equivalent to:
(
    bond
      [bond['Actor'] == 'Sean Connery']
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
13,Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [91]:
# Sean Connery movies directed by Terence Young
(
    bond
      .query(
          "Actor == 'Sean Connery' and Director == 'Terence Young'"      # all in a single string; and must be lowercase
      )
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [92]:
# Daniel Craig movies or Budget less than 10 million
(
    bond
      .query(
          "Budget < 10 or Actor == 'Daniel Craig'"      # or must be lowercase
      )
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
22,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
23,Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
25,Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,


In [93]:
# Box Office > 500
(
    bond
      .query(
          "`Box Office` > 500"        # use back ticks for columns with spaces
      )
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
10,The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,
11,Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
18,GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
22,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
23,Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [94]:
# Pierce Brosnan or Danile Craig movies
(
    bond
      .query("Actor in ['Pierce Brosnan', 'Daniel Craig']")
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
18,GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
19,Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0
20,The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
21,Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
22,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
23,Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
25,Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,


In [95]:
# above is equivalent to:
(
    bond
      [bond['Actor'].isin(['Pierce Brosnan', 'Daniel Craig'])]
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
18,GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
19,Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0
20,The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
21,Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
22,Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
23,Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
24,Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
25,Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,


In [96]:
# all films except those by Daniel Craig and Pierce Brosnan
(
    bond
      .query(
          "Actor not in ['Pierce Brosnan', 'Daniel Craig']"
      )
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
8,Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
9,The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


In [97]:
# above is equivalent to
(
    bond
      [~bond['Actor'].isin(['Pierce Brosnan', 'Daniel Craig'])]    # note the tilde ~
)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
8,Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
9,The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


## .apply() method

In [109]:
# reload data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [110]:
# convert 'Box Office', 'Budget', and 'Bond Actor Salary' Series/columns to string and add 'millions'

# custom function that takes as input a number and outputs a string with 'millions' added to it
def convert_to_string_and_add_millions(number):
  return str(number) + ' millions'

# apply custom function to each Series/column
bond['Box Office'] = bond['Box Office'].apply(convert_to_string_and_add_millions)
bond['Budget'] = bond['Budget'].apply(convert_to_string_and_add_millions)
bond['Bond Actor Salary'] = bond['Bond Actor Salary'].apply(convert_to_string_and_add_millions)

# examine
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8 millions,7.0 millions,0.6 millions
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8 millions,12.6 millions,1.6 millions
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4 millions,18.6 millions,3.2 millions
3,Thunderball,1965,Sean Connery,Terence Young,848.1 millions,41.9 millions,4.7 millions
4,Casino Royale,1967,David Niven,Ken Hughes,315.0 millions,85.0 millions,nan millions


In [111]:
# better way to do the above

# start with fresh dataset
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

# custom function
def convert_to_string_and_add_millions(number):
  return str(number) + ' millions'

# iterate through columns to convert
for col in ['Box Office', 'Budget', 'Bond Actor Salary']:
  bond[col] = bond[col].apply(convert_to_string_and_add_millions)    # note lack of return keyword; only for functions

# examine
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8 millions,7.0 millions,0.6 millions
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8 millions,12.6 millions,1.6 millions
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4 millions,18.6 millions,3.2 millions
3,Thunderball,1965,Sean Connery,Terence Young,848.1 millions,41.9 millions,4.7 millions
4,Casino Royale,1967,David Niven,Ken Hughes,315.0 millions,85.0 millions,nan millions


In [116]:
# above doesn't take into account missing values
pd.Series([1.0, 2.0, None, 4.0]).astype('str') + ' millions'

0    1.0 millions
1    2.0 millions
2    nan millions
3    4.0 millions
dtype: object

## .apply() method continued
- on a DataFrame
- TODO: difference between & vs. and, | vs. or
- remember axis = 'columns' inside .apply() method for adding new column to a dataframe
- equivalent to SQL CASE WHEN

In [118]:
# load data
bond = pd.read_csv(
    'https://raw.githubusercontent.com/jack-cao-623/python_learning/main/pandas/jamesbond.csv'
)

bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [136]:
# cluster/rank each bond movie into 1 of 3 buckets based on actor and budget

# custom function
def bucket_rank(row):

    actor = row[2]         # in each row of the bond DataFrame, actor is in index position #2
    budget = row[5]        # in each row of the bond DataFrame, budget is in index position #5

    if actor in ['Daniel Craig', 'Pierce Brosnan']:
      return 'Good'
    if (actor == 'Sean Connery') and (budget > 50):      # & ampersand doesn't work; why?
      return 'Okay'
    else:
      return 'Unknown'

In [138]:
# apply function to each row; but iterating through columns, see axis parameter below

bond['bucket_rank'] = bond.apply(bucket_rank, axis = 'columns')      # axis = 'index' is the default and doesn't work

In [141]:
# examine results
bond

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary,bucket_rank
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6,Unknown
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6,Unknown
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2,Unknown
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7,Unknown
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,,Unknown
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4,Okay
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6,Unknown
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8,Unknown
8,Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,,Unknown
9,The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,,Unknown


## .copy() method
- doesn't work like how R does it
  - a is original; b = a doesn't produce a copy
  - b = a.copy() does
- if you want to make a copy of x and assign to variable y, do y = x.copy()

In [143]:
# toy data
fake_data_a = pd.DataFrame({'a': [1, 2, 3], 
                            'b': [4, 5, 6]})

fake_data_a

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [144]:
# bad way to make a copy; affects fake_data_a
fake_data_b = fake_data_a

# new column in fake_data_b
fake_data_b['c'] = fake_data_b['a'] + fake_data_b['b']

# examine
fake_data_b

Unnamed: 0,a,b,c
0,1,4,5
1,2,5,7
2,3,6,9


In [145]:
# does fake_data_a change?
fake_data_a

# yes!

Unnamed: 0,a,b,c
0,1,4,5
1,2,5,7
2,3,6,9


In [146]:
# the right way to do it:

# DataFrame a
fake_data_a = pd.DataFrame({'a': [1, 2, 3], 
                            'b': [4, 5, 6]})

fake_data_a

# DataFrame b
fake_data_b = fake_data_a.copy()

# change DataFrame b
fake_data_b['c'] = fake_data_b['a'] + fake_data_b['b']

# examine b
fake_data_b

Unnamed: 0,a,b,c
0,1,4,5
1,2,5,7
2,3,6,9


In [147]:
# a doesn't change
fake_data_a

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [151]:
# this behaves like R
x = 2    
y = x        # int object doesn't have an .copy() attribute; but DataFrames and Series do
print(y + 4)
print(x)

6
2
