# The Series Data Structure

In [1]:
import pandas as pd

In [3]:
pd.Series?

In [4]:
numbers = [1, 2, 3, 4]
pd.Series(numbers)

0    1
1    2
2    3
3    4
dtype: int64

In [5]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [7]:
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [5]:
import numpy as np
np.nan == None

False

In [8]:
np.nan == np.nan

False

In [13]:
np.isnan(np.nan)

True

In [6]:
np.isnan(5)

False

In [9]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [19]:
s['Sumo']

'Japan'

In [20]:
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')

In [10]:
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada'])
s

India      Tiger
America     Bear
Canada     Moose
dtype: object

In [11]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
s

Golf      Scotland
Sumo         Japan
Hockey         NaN
dtype: object

# Querying a Series

In [12]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [25]:
s.iloc[2] #keep in mind that iloc and loc are not methods, they are attributes.

'Japan'

In [26]:
s.loc['Sumo']

'Japan'

In [27]:
s[3]

'South Korea'

In [28]:
s['Golf']

'Scotland'

In [46]:
sports = {0: 'Bhutan',
          100: 'Scotland',
          101: 'Japan',
          102: 'South Korea'}
s = pd.Series(sports)
s

0           Bhutan
100       Scotland
101          Japan
102    South Korea
dtype: object

In [47]:
s.iloc[:,]

0           Bhutan
100       Scotland
101          Japan
102    South Korea
dtype: object

In [9]:
s[100] #This won't call s.iloc[0] as one might expect, it generates an error instead

'Scotland'

In [15]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [18]:
s += 2
s

0    104.0
1    124.0
2    105.0
3      7.0
dtype: float64

In [17]:
total = 0
for item in s:
    total+=item
print(total)

332.0


In [19]:
import numpy as np
total = np.sum(s)
print(total)

340.0


In [20]:
a = pd.Series([100,200])
print(a)
np.sum(a)

0    100
1    200
dtype: int64


300

In [41]:
len(s)

4

In [22]:
s+=2 #adds two to each item in s using broadcasting
s.head()

0    108.0
1    128.0
2    109.0
3     11.0
dtype: float64

In [27]:
s = pd.Series(np.random.randint(0,1000,10000))
s.head()

0    717
1     75
2    227
3    737
4    282
dtype: int32

In [29]:
s.describe()

count    10000.000000
mean       499.580500
std        289.207934
min          0.000000
25%        252.000000
50%        498.000000
75%        750.000000
max        999.000000
dtype: float64

In [25]:
s = pd.Series([1, 2, 3])
s

0    1
1    2
2    3
dtype: int64

# The DataFrame Data Structure

In [32]:
import pandas as pd
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 2', 'Store 3'])
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 2,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [35]:
df.loc['Store 3']

Name                  Vinod
Item Purchased    Bird Seed
Cost                    5.0
Name: Store 3, dtype: object

In [36]:
pd.DataFrame([df.loc['Store 3']], index=['store_3'])

Unnamed: 0,Name,Item Purchased,Cost
store_3,Vinod,Bird Seed,5.0


In [52]:
pd.DataFrame((df.loc['Store 2']['Store 3']), index=[['store2', 'store3']])

KeyError: 'Store 3'

In [54]:
type(df.loc['Store 2'])

pandas.core.series.Series

In [55]:
df.loc['Store 1']

Name                 Chris
Item Purchased    Dog Food
Cost                  22.5
Name: Store 1, dtype: object

In [48]:
df.loc['Store 1', 'Cost']

22.5

## For the purchase records from the pet store, how would you get a list of all items which had been purchased (regardless of where they might have been purchased, or by whom)?

In [48]:
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 2', 'Store 3'])

# Your code here
df['Item Purchased']

Store 1        Dog Food
Store 2    Kitty Litter
Store 3       Bird Seed
Name: Item Purchased, dtype: object

In [74]:
df.loc[:, ['Cost']]

Unnamed: 0,Cost
Store 1,22.5
Store 2,2.5
Store 3,5.0


In [24]:
df.T

Unnamed: 0,Store 1,Store 2,Store 3
Name,Chris,Kevyn,Vinod
Item Purchased,Dog Food,Kitty Litter,Bird Seed
Cost,22.5,2.5,5.0


In [76]:
df.T.loc['Cost']

Store 1    22.5
Store 2     2.5
Store 3       5
Name: Cost, dtype: object

In [77]:
df['Cost']

Store 1    22.5
Store 2     2.5
Store 3     5.0
Name: Cost, dtype: float64

In [78]:
df.loc['Store 1']['Cost']

22.5

In [79]:
df.loc[:,['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 2,Kevyn,2.5
Store 3,Vinod,5.0


In [54]:
df2 = df.drop('Name', axis = 1)
df2

Unnamed: 0,Item Purchased,Cost
Store 1,Dog Food,22.5
Store 2,Kitty Litter,2.5
Store 3,Bird Seed,5.0


In [81]:
df1 = df.drop('Store 1')
df1

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [82]:
df.drop('Store 1')

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [83]:
df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 2,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [25]:
copy_df = df.copy()
copy_df = copy_df.drop('Store 1')
copy_df

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Kevyn,Kitty Litter,2.5
Store 3,Vinod,Bird Seed,5.0


In [26]:
copy_df.drop?

In [27]:
del copy_df['Name']
copy_df

Unnamed: 0,Item Purchased,Cost
Store 2,Kitty Litter,2.5
Store 3,Bird Seed,5.0


In [28]:
# to add a column in a dataset
df['Location'] = df['Cost']+ 5
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Chris,Dog Food,22.5,27.5
Store 2,Kevyn,Kitty Litter,2.5,7.5
Store 3,Vinod,Bird Seed,5.0,10.0


# Dataframe Indexing and Loading

In [49]:
costs = df['Cost']
costs

Store 1    22.5
Store 2     2.5
Store 3     5.0
store 4     NaN
Name: Cost, dtype: float64

In [50]:
costs+=2
costs

Store 1    24.5
Store 2     4.5
Store 3     7.0
store 4     NaN
Name: Cost, dtype: float64

In [51]:
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Chris,Dog Food,24.5,27.5
Store 2,Kevyn,Kitty Litter,4.5,7.5
Store 3,Vinod,Bird Seed,7.0,10.0
store 4,,,,


## For the purchase records from the pet store, how would you update the DataFrame, applying a discount of 20% across all the values in the 'Cost' column?

In [61]:
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])

# Your answer here
df['discounted cost']=df['Cost']-(df['Cost']*0.2)
df

Unnamed: 0,Name,Item Purchased,Cost,discounted cost
Store 1,Chris,Dog Food,22.5,18.0
Store 1,Kevyn,Kitty Litter,2.5,2.0
Store 2,Vinod,Bird Seed,5.0,4.0
