Series is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers,
Python objects, etc.). The axis labels are collectively referred to as the index.

s = pd.Series(data, index=index)

Here, data can be many different things:
• a Python dict

• an ndarray

• a scalar value (like 5)

In [1]:
import pandas as pd
import numpy as np
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])


In [2]:
s

a    0.177380
b    0.400234
c    1.013204
d    0.032835
e   -1.372633
dtype: float64

In [3]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [4]:
pd.Series(np.random.randn(5))

0   -1.391327
1    1.100761
2    0.270466
3    1.199996
4   -0.662005
dtype: float64

In [5]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}

In [6]:
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [7]:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [8]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [9]:
s[0]

0.17737991454688376

In [10]:
s[:3]

a    0.177380
b    0.400234
c    1.013204
dtype: float64

In [11]:
s[s > s.median()]

b    0.400234
c    1.013204
dtype: float64

In [12]:
s[[4, 3, 1]]

e   -1.372633
d    0.032835
b    0.400234
dtype: float64

In [13]:
np.exp(s)

a    1.194085
b    1.492174
c    2.754412
d    1.033380
e    0.253439
dtype: float64

In [14]:
## Series is dict-like
s['a']

0.17737991454688376

In [15]:
s['e'] = 12.


In [16]:
s

a     0.177380
b     0.400234
c     1.013204
d     0.032835
e    12.000000
dtype: float64

In [17]:
'e' in s

True

In [18]:
'f' in s

False

In [19]:
s['f']

KeyError: 'f'

In [20]:
s.get('f')

In [21]:
s.get('f', np.nan)

nan

In [22]:
## Vectorized operations and label alignment with Series

In [23]:
s + s

a     0.354760
b     0.800468
c     2.026408
d     0.065670
e    24.000000
dtype: float64

In [24]:
s * 2

a     0.354760
b     0.800468
c     2.026408
d     0.065670
e    24.000000
dtype: float64

In [25]:
np.exp(s)

a         1.194085
b         1.492174
c         2.754412
d         1.033380
e    162754.791419
dtype: float64

In [26]:
s[1:] + s[:-1]

a         NaN
b    0.800468
c    2.026408
d    0.065670
e         NaN
dtype: float64

In [27]:
s = pd.Series(np.random.randn(5), name='something')

In [28]:
s

0   -1.213994
1   -0.249350
2   -0.463274
3   -0.749320
4   -1.257639
Name: something, dtype: float64

In [29]:
s.name

'something'

In [30]:
s2 = s.rename("different")

In [31]:
s2.name

'different'

## DataFrame


DataFrame is a 2-dimensional labeled data structure with columns of potentially different types. You can think of it
like a spreadsheet or SQL table, or a dict of Series objects.

It is generally the most commonly used pandas object.

Like Series, DataFrame accepts many different kinds of input:

• Dict of 1D ndarrays, lists, dicts, or Series

• 2-D numpy.ndarray

• Structured or record ndarray

• A Series

• Another DataFrame


In [32]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

In [33]:
df = pd.DataFrame(d)

In [34]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [35]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [36]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [37]:
d = {'one' : [1., 2., 3., 4.],
      'two' : [4., 3., 2., 1.]}

In [38]:
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [39]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [40]:
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])

In [41]:
data[:] = [(1,2.,'Hello'), (2,3.,"World")]

In [42]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [43]:
pd.DataFrame(data, index=['first', 'second'])

Unnamed: 0,A,B,C
first,1,2.0,b'Hello'
second,2,3.0,b'World'


In [44]:
pd.DataFrame(data, columns=['C', 'A', 'B'])

Unnamed: 0,C,A,B
0,b'Hello',1,2.0
1,b'World',2,3.0


In [45]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]

In [46]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [47]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [48]:
pd.DataFrame(data2, columns=['a', 'b'])


Unnamed: 0,a,b
0,1,2
1,5,10


In [49]:
pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2},
('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4},
('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6},
('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8},
('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}})

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


In [50]:
data

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [51]:
pd.DataFrame.from_records(data, index='C')

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'Hello',1,2.0
b'World',2,3.0


In [52]:
pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])])

  """Entry point for launching an IPython kernel.


Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [53]:
pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
orient='index', columns=['one', 'two', 'three'])

  


Unnamed: 0,one,two,three
A,1,2,3
B,4,5,6


In [54]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [55]:
df['three'] = df['one'] * df['two']

In [56]:
df['flag'] = df['one'] > 2

In [57]:
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [58]:
del df['two']

In [59]:
three = df.pop('three')

In [60]:
three

a    1.0
b    4.0
c    9.0
d    NaN
Name: three, dtype: float64

In [61]:
df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [62]:
df['foo'] = 'bar'

In [63]:
df

Unnamed: 0,one,flag,foo
a,1.0,False,bar
b,2.0,False,bar
c,3.0,True,bar
d,,False,bar


In [64]:
df['one_trunc'] = df['one'][:2]

In [65]:
df

Unnamed: 0,one,flag,foo,one_trunc
a,1.0,False,bar,1.0
b,2.0,False,bar,2.0
c,3.0,True,bar,
d,,False,bar,


In [66]:
df1 = pd.DataFrame({'a' : [1, 0, 1], 'b' : [0, 1, 1] }, dtype=bool)

In [67]:
df2 = pd.DataFrame({'a' : [0, 1, 1], 'b' : [1, 1, 0] }, dtype=bool)

In [68]:
df1 & df2

Unnamed: 0,a,b
0,False,False
1,False,True
2,True,False


In [69]:
df1 | df2

Unnamed: 0,a,b
0,True,True
1,True,True
2,True,True


In [70]:
df1 ^ df2

Unnamed: 0,a,b
0,True,True
1,True,False
2,False,True


In [71]:
-df1

Unnamed: 0,a,b
0,False,True
1,True,False
2,False,False


In [72]:
# Transposing

In [73]:
df[:5].T

Unnamed: 0,a,b,c,d
one,1,2,3,
flag,False,False,True,False
foo,bar,bar,bar,bar
one_trunc,1,2,,
