# The Series data structure

In [26]:
import pandas as pd
pd.Series?

In [27]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [28]:
numbers = [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [29]:
# missing values:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [30]:
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [31]:
# nan is not null

import numpy as np
np.nan == None

False

In [33]:
# to specify nan value, one should use special function np.isnan
np.isnan(np.nan)

True

# Querrying a Series

In [34]:
# using iloc attribute for numeric location, starting at zero
# using loc attribute for index label

In [37]:
sports = {'Anchery': 'Bhutan',
         'Golf': 'Scotland',
         'Sumo': 'Japan',
         'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Anchery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [39]:
# to see the country on the fourth country

s.iloc[3]

'South Korea'

In [41]:
s.iloc[3] == s[3]

True

In [40]:
# to see which country has 'Golf' as their national sport

s.loc['Golf']

'Scotland'

In [42]:
s.loc['Golf'] == s['Golf']

True

# Data frame data structure

In [44]:
import pandas as pd

purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3],
                  index=['Store 1', 'Store 1', 'Store 2'])

df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [45]:
df.loc['Store 2']

Cost                      5
Item Purchased    Bird Seed
Name                  Vinod
Name: Store 2, dtype: object

In [46]:
df.loc['Store 1']

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn


In [47]:
# to get all the item purchased, regardless of store and by whom
df['Item Purchased']

Store 1        Dog Food
Store 1    Kitty Litter
Store 2       Bird Seed
Name: Item Purchased, dtype: object

In [49]:
# selecting data from its row name and col name
df.loc['Store 1', 'Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

In [50]:
# selecting  data from all row
df.loc[:, ['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 1,Kevyn,2.5
Store 2,Vinod,5.0


In [51]:
# adding new col
df['Location'] = None
df

Unnamed: 0,Cost,Item Purchased,Name,Location
Store 1,22.5,Dog Food,Chris,
Store 1,2.5,Kitty Litter,Kevyn,
Store 2,5.0,Bird Seed,Vinod,


In [53]:
# deleting (dropping) data
df.drop('Store 1')

Unnamed: 0,Cost,Item Purchased,Name,Location
Store 2,5.0,Bird Seed,Vinod,


In [55]:
# drop make a copy, the original df is untouched by default
df

Unnamed: 0,Cost,Item Purchased,Name,Location
Store 1,22.5,Dog Food,Chris,
Store 1,2.5,Kitty Litter,Kevyn,
Store 2,5.0,Bird Seed,Vinod,


In [64]:
# we can make a copy and drop data
df_copy = df.copy()
df_copy = df_copy.drop('Store 1')
df_copy

Unnamed: 0,Cost,Item Purchased,Name,Location
Store 2,5.0,Bird Seed,Vinod,


In [65]:
# to delete the column using del keyword
del df_copy['Name']
df_copy

# another way is to use drop() with axix parameter where 1 mean col

Unnamed: 0,Cost,Item Purchased,Location
Store 2,5.0,Bird Seed,


# Indexing dataframe

In [66]:
import pandas as pd

purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3],
                  index=['Store 1', 'Store 1', 'Store 2'])

df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Dog Food,Chris
Store 1,2.5,Kitty Litter,Kevyn
Store 2,5.0,Bird Seed,Vinod


In [67]:
'''
Reindex the dataframe to be indexed hierachically, first by store, then by person.
Name these indexes 'Location' and 'Name'.
'''
# df = df.reset_index()
df = df.set_index([df.index, 'Name'])
df.index.names = ['Location', 'Name']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Cost,Item Purchased
Location,Name,Unnamed: 2_level_1,Unnamed: 3_level_1
Store 1,Chris,22.5,Dog Food
Store 1,Kevyn,2.5,Kitty Litter
Store 2,Vinod,5.0,Bird Seed


In [68]:
'''
Then add new entry to it with value of: 
'Name': 'Kevyn','Item Purchased': 'Kitty Food','Cost': 3, 'Location': Store 2
'''
df = df.append(pd.Series(data = {'Cost': 3.00, 'Item Purchased': 'Kitty Food'}, name = ('Store 2', 'Kevyn')))
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Cost,Item Purchased
Location,Name,Unnamed: 2_level_1,Unnamed: 3_level_1
Store 1,Chris,22.5,Dog Food
Store 1,Kevyn,2.5,Kitty Litter
Store 2,Vinod,5.0,Bird Seed
Store 2,Kevyn,3.0,Kitty Food
