In [4]:
import numpy as np
import pandas as pd

A = np.array([0, [1, 5]])

In [6]:
A[0]

0

In [7]:
# Series as dictionary

In [8]:
data = pd.Series(
    [0.2, 0.3, 0.6, 1.2],
    index = ['a', 'b', 'c', 'd']
)

data

a    0.2
b    0.3
c    0.6
d    1.2
dtype: float64

In [9]:
data['b']

0.3

In [13]:
'x' in data

False

In [14]:
'a' in data

True

In [15]:
data.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [16]:
list(data.items())

[('a', 0.2), ('b', 0.3), ('c', 0.6), ('d', 1.2)]

In [19]:
data.values

array([0.2, 0.3, 0.6, 1.2])

In [20]:
data[:]

a    0.2
b    0.3
c    0.6
d    1.2
dtype: float64

In [21]:
# Series can be extended by putting new values into it:

data['e'] = 20

In [22]:
data

a     0.2
b     0.3
c     0.6
d     1.2
e    20.0
dtype: float64

In [23]:
data['a':'c']

a    0.2
b    0.3
c    0.6
dtype: float64

In [25]:
data[0:2]

a    0.2
b    0.3
dtype: float64

In [26]:
data[(data >= 0.3) & (data <= 1.2)]

b    0.3
c    0.6
d    1.2
dtype: float64

In [29]:
data[['a', 'c']]

a    0.2
c    0.6
dtype: float64

In [30]:
data = pd.Series(
                ['a', 'b', 'c'],
                index = [1, 3, 5])
data

1    a
3    b
5    c
dtype: object

In [33]:
data[3]

'b'

In [35]:
data[0:3]

1    a
3    b
5    c
dtype: object

In [36]:
data[2]

KeyError: 2

In [38]:
# ^ As seen above the index for 2 doesn't work because
# it is trying to look for an index '2'
# so we use .iloc

data.iloc[2]

'c'

In [39]:
data

1    a
3    b
5    c
dtype: object

In [40]:
# To perform regular operations that adheres strictly to indexes
# we us .loc

data.loc[3]

'b'

In [42]:
data.loc[1:5]

1    a
3    b
5    c
dtype: object

In [43]:
# Therefore, it is good practice to use both
# loc and iloc to eliminate confusion

In [45]:
Barea = ({
    'WA' : 5,
    'PA' : 4,
    'NY' : 3,
    'IL' : 2,
    'VA' : 1
})

Bpop = ({
    'WA' : 550,
    'PA' : 440,
    'NY' : 330,
    'IL' : 220,
    'VA' : 110
})

area = pd.Series(Barea)
pop = pd.Series(Bpop)

data = pd.DataFrame({
    'population' : pop,
    'area' : area
}).sort_index()

data

Unnamed: 0,population,area
IL,220,2
NY,330,3
PA,440,4
VA,110,1
WA,550,5


In [47]:
data['area']

IL    2
NY    3
PA    4
VA    1
WA    5
Name: area, dtype: int64

In [48]:
data.area

IL    2
NY    3
PA    4
VA    1
WA    5
Name: area, dtype: int64

In [49]:
# dataFrames can be appended as well:

data['Density'] = data['population']/data['area']
data

Unnamed: 0,population,area,Density
IL,220,2,110.0
NY,330,3,110.0
PA,440,4,110.0
VA,110,1,110.0
WA,550,5,110.0


In [50]:
data.values

array([[220.,   2., 110.],
       [330.,   3., 110.],
       [440.,   4., 110.],
       [110.,   1., 110.],
       [550.,   5., 110.]])

In [51]:
# Use .T to tranform rows to columns

data.T

Unnamed: 0,IL,NY,PA,VA,WA
population,220.0,330.0,440.0,110.0,550.0
area,2.0,3.0,4.0,1.0,5.0
Density,110.0,110.0,110.0,110.0,110.0


In [52]:
data

Unnamed: 0,population,area,Density
IL,220,2,110.0
NY,330,3,110.0
PA,440,4,110.0
VA,110,1,110.0
WA,550,5,110.0


In [53]:
data.T['IL']

population    220.0
area            2.0
Density       110.0
Name: IL, dtype: float64

In [56]:
data.values[1] # Gives all the values for NY

array([330.,   3., 110.])

In [57]:
data.iloc[:3, :2]

Unnamed: 0,population,area
IL,220,2
NY,330,3
PA,440,4


In [58]:
data.loc[:'PA', :'area']

Unnamed: 0,population,area
IL,220,2
NY,330,3
PA,440,4


In [59]:
# ix is a hybrid of .iloc and loc

data.ix[:3, :'area']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  retval = getattr(retval, self.name)._getitem_axis(key, axis=i)


Unnamed: 0,population,area
IL,220,2
NY,330,3
PA,440,4


In [60]:
data

Unnamed: 0,population,area,Density
IL,220,2,110.0
NY,330,3,110.0
PA,440,4,110.0
VA,110,1,110.0
WA,550,5,110.0


In [61]:
data.loc[data.area >= 3, ['area', 'density']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Unnamed: 0,area,density
NY,3,
PA,4,
WA,5,


In [62]:
data.iloc[0, 2]

110.0

In [63]:
data.iloc[1, 1] = 30
data

Unnamed: 0,population,area,Density
IL,220,2,110.0
NY,330,30,110.0
PA,440,4,110.0
VA,110,1,110.0
WA,550,5,110.0
