## Series Data Structure

In [2]:
import pandas as pd

In [3]:
animal = ['Tiger','Bear','Moose']
pd.Series(animal)

0    Tiger
1     Bear
2    Moose
dtype: object

In [11]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [8]:
s1 = pd.Series(['Tiger','Bear','Moose'], index = ['India','America','Canada'])
s1

India      Tiger
America     Bear
Canada     Moose
dtype: object

## Querying a Series

In [12]:
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [14]:
s.iloc[3]


'South Korea'

In [16]:
s.loc['Taekwondo']

'South Korea'

In [17]:
s[3]

'South Korea'

#### vectorization

In [4]:
import numpy as np 
s = pd.Series(np.random.randint(0,1000,10000))
s.head()

0    837
1    910
2    572
3    194
4     28
dtype: int64

In [5]:
len(s)

10000

In [8]:
%%timeit -n 100
summary = 0
for item in s:
    summary += item

100 loops, best of 3: 2.05 ms per loop


In [10]:
%%timeit -n 100
s = pd.Series(np.random.randint(0,1000,10000))
summary = np.sum(s)

100 loops, best of 3: 242 µs per loop


#### avoid doing iteration for series

In [11]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
    s.loc[label] = value + 2

10 loops, best of 3: 839 ms per loop


In [6]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s += 2


10 loops, best of 3: 302 µs per loop


In [8]:
s = pd.Series([1,2,3])
s.loc['Animal'] = 'bear'
s

0            1
1            2
2            3
Animal    bear
dtype: object

## Data Frame Datastructure

In [20]:
import pandas as pd
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index = ['store 1','store 1','store 2'])
df.head()

Unnamed: 0,Cost,Item Purchased,Name
store 1,22.5,Dog Food,Chris
store 1,2.5,Kitty Litter,Kevyn
store 2,5.0,Bird Seed,Vinod


In [18]:
df.loc['store 2']

Cost                       2.5
Item Purchased    Kitty Litter
Name                     Kevyn
Name: store 2, dtype: object

In [19]:
df.iloc[1]

Cost                       2.5
Item Purchased    Kitty Litter
Name                     Kevyn
Name: store 2, dtype: object

In [21]:
df.loc['store 1','Cost']

store 1    22.5
store 1     2.5
Name: Cost, dtype: float64

In [22]:
df.T

Unnamed: 0,store 1,store 1.1,store 2
Cost,22.5,2.5,5
Item Purchased,Dog Food,Kitty Litter,Bird Seed
Name,Chris,Kevyn,Vinod


In [25]:
df.T.loc['Cost']

store 1    22.5
store 1     2.5
store 2       5
Name: Cost, dtype: object

In [27]:
df['Cost']

store 1    22.5
store 1     2.5
store 2     5.0
Name: Cost, dtype: float64

In [30]:
df.loc['store 1']['Cost']

store 1    22.5
store 1     2.5
Name: Cost, dtype: float64

In [31]:
df.loc[:,['Cost','Name']]

Unnamed: 0,Cost,Name
store 1,22.5,Chris
store 1,2.5,Kevyn
store 2,5.0,Vinod


In [33]:
df.loc[['store 1','store 2'],['Cost']]

Unnamed: 0,Cost
store 1,22.5
store 1,2.5
store 2,5.0


In [36]:
copy_df = df.copy()
copy_df = copy_df.drop('store 1')
copy_df

Unnamed: 0,Cost,Item Purchased,Name
store 2,5.0,Bird Seed,Vinod
