# The Series Data Structure

In [1]:
import pandas as pd
pd.Series?

In [3]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [4]:
numbers = [1,2,3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [5]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [6]:
numbers = [1,2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [7]:
import numpy as np
np.nan == None

False

In [9]:
np.isnan(np.nan)

True

In [10]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [11]:
s = pd.Series(np.random.randint(0,1000,10000))
s.head()

0    178
1     51
2    785
3    572
4     78
dtype: int32

In [12]:
len(s)

10000

In [13]:
%%timeit -n 100
summary = 0
for item in s:
    summary+=item

1.89 ms ± 105 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [14]:
%%timeit -n 100
summary = np.sum(s)

253 µs ± 125 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [16]:
s+=2
s.head()

0    182
1     55
2    789
3    576
4     82
dtype: int32

In [17]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
    s.loc[label]= value+2

1.05 s ± 16 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [18]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2


850 µs ± 185 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [19]:
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'
s

0             1
1             2
2             3
Animal    Bears
dtype: object

In [20]:
original_sports = pd.Series({'Archery': 'Bhutan',
                             'Golf': 'Scotland',
                             'Sumo': 'Japan',
                             'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'England'], 
                                   index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)

In [21]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [22]:
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [23]:
all_countries.loc['Cricket']

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

# Data Frames and Data Structure

In [24]:
import pandas as pd

pur_1 = pd.Series({'Name': 'Aru', 'Item Purchased':'Guns', 'Cost':300})
pur_2 = pd.Series({'Name': 'James', 'Item Purchased': 'Bullets', 'Cost':20})
pur_3 = pd.Series({'Name': 'Johnah', 'Item Purchased': 'RPG', 'Cost': 20000})

df = pd.DataFrame([pur_1,pur_2,pur_3], index = ['Ammo Store 1', 'Ammo Store 2', 'Ammo store 3'])

df.head()

Unnamed: 0,Name,Item Purchased,Cost
Ammo Store 1,Aru,Guns,300
Ammo Store 2,James,Bullets,20
Ammo store 3,Johnah,RPG,20000


In [26]:
df.loc['Ammo Store 1']

Name               Aru
Item Purchased    Guns
Cost               300
Name: Ammo Store 1, dtype: object

In [30]:
df['Location'] = ['India','UK','Israel']
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Ammo Store 1,Aru,Guns,300,India
Ammo Store 2,James,Bullets,20,UK
Ammo store 3,Johnah,RPG,20000,Israel


In [33]:
df.drop('Ammo Store 1')

Unnamed: 0,Name,Item Purchased,Cost,Location
Ammo Store 2,James,Bullets,20,UK
Ammo store 3,Johnah,RPG,20000,Israel


In [34]:
df['Cost'] *= 0.8 #Apply 10% discount
print(df)

                Name Item Purchased     Cost Location
Ammo Store 1     Aru           Guns    240.0    India
Ammo Store 2   James        Bullets     16.0       UK
Ammo store 3  Johnah            RPG  16000.0   Israel


In [38]:
df[df['Cost']==240]

Unnamed: 0,Name,Item Purchased,Cost,Location
Ammo Store 1,Aru,Guns,240.0,India


In [39]:
import requests
import io

In [40]:
url = 'https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/master/ClassNotebooks/log.csv'
download = requests.get(url).content
df = pd.read_csv(io.StringIO(download.decode('utf-8')))

df.head()

Unnamed: 0,time,user,video,playback position,paused,volume
0,1469974424,cheryl,intro.html,5,False,10.0
1,1469974454,cheryl,intro.html,6,,
2,1469974544,cheryl,intro.html,9,,
3,1469974574,cheryl,intro.html,10,,
4,1469977514,bob,intro.html,1,,


In [41]:
df.fillna?


In [44]:
df = df.set_index('time')
df = df.sort_index()
df

Unnamed: 0_level_0,user,video,playback position,paused,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,,
1469974454,sue,advanced.html,24,,
1469974484,cheryl,intro.html,7,,
1469974514,cheryl,intro.html,8,,
1469974524,sue,advanced.html,25,,
1469974544,cheryl,intro.html,9,,
1469974554,sue,advanced.html,26,,
1469974574,cheryl,intro.html,10,,


In [45]:
df = df.reset_index()
df = df.set_index(['time', 'user'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,,
1469974454,sue,advanced.html,24,,
1469974484,cheryl,intro.html,7,,
1469974514,cheryl,intro.html,8,,
1469974524,sue,advanced.html,25,,
1469974544,cheryl,intro.html,9,,
1469974554,sue,advanced.html,26,,
1469974574,cheryl,intro.html,10,,


In [46]:
df = df.fillna(method='ffill')
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,False,10.0
1469974454,sue,advanced.html,24,False,10.0
1469974484,cheryl,intro.html,7,False,10.0
