In [68]:
import pandas as pd
import numpy as np

## Series

In [30]:
s = pd.Series([1, 10, -1, 20])
s, s.index, s.values

(0     1
 1    10
 2    -1
 3    20
 dtype: int64,
 RangeIndex(start=0, stop=4, step=1),
 array([ 1, 10, -1, 20]))

In [31]:
s2 = pd.Series([1, 10, -1, 20], index= ['a', 'b', 'c', 'd'])
s2, s2.index, s2.values

(a     1
 b    10
 c    -1
 d    20
 dtype: int64,
 Index(['a', 'b', 'c', 'd'], dtype='object'),
 array([ 1, 10, -1, 20]))

In [32]:
s2['a'] = 101
s[[0,2]] = [33, 44]
s2[0], s2['a'], s[[0,2]], s, s2

(101,
 101,
 0    33
 2    44
 dtype: int64,
 0    33
 1    10
 2    44
 3    20
 dtype: int64,
 a    101
 b     10
 c     -1
 d     20
 dtype: int64)

In [33]:
s2< 0, s2[s2 < 0]

(a    False
 b    False
 c     True
 d    False
 dtype: bool,
 c   -1
 dtype: int64)

In [25]:
s[s > 33] *= 2
s[s == 10] /= 2
s

0    33
1     5
2    88
3    20
dtype: int64

In [29]:
myd = {'a': 1.0, 'b': 2.0, 'c': 3.0}
s3 = pd.Series(myd, index=['c', 'b', 'd'])
myd, s3, s3.isnull(), s3[s3.isnull()], pd.isna(s3), pd.isnull(s3)

({'a': 1.0, 'b': 2.0, 'c': 3.0},
 c    3.0
 b    2.0
 d    NaN
 dtype: float64,
 c    False
 b    False
 d     True
 dtype: bool,
 d   NaN
 dtype: float64,
 c    False
 b    False
 d     True
 dtype: bool,
 c    False
 b    False
 d     True
 dtype: bool)

In [66]:
s4 = pd.Series([100, 100, 100, 200], index= ['a', 'b', 'd', 'e'])
s2, s4, s2+s4

(a    101
 b     10
 c     -1
 d     20
 dtype: int64,
 a    100
 b    100
 d    100
 e    200
 dtype: int64,
 a    201.0
 b    110.0
 c      NaN
 d    120.0
 e      NaN
 dtype: float64)

## DataFrame

In [51]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada',
'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data, columns=['state', 'year', 'pop', 'height'])

In [54]:
frame.head(2)

Unnamed: 0,state,year,pop,height
0,Ohio,2000,1.5,
1,Ohio,2001,1.7,


In [55]:
frame.tail(2)

Unnamed: 0,state,year,pop,height
4,Nevada,2002,2.9,
5,Nevada,2003,3.2,


In [58]:
frame.state, ' ', frame['state']

(0      Ohio
 1      Ohio
 2      Ohio
 3    Nevada
 4    Nevada
 5    Nevada
 Name: state, dtype: object,
 ' ',
 0      Ohio
 1      Ohio
 2      Ohio
 3    Nevada
 4    Nevada
 5    Nevada
 Name: state, dtype: object)

In [77]:
frame2 = pd.DataFrame(data,
                      columns=['year', 'state', 'pop', 'debt', 'size'],
                      index=['one', 'two', 'three', 'four', 'five', 'six']
                     )
frame2

Unnamed: 0,year,state,pop,debt,size
one,2000,Ohio,1.5,,
two,2001,Ohio,1.7,,
three,2002,Ohio,3.6,,
four,2001,Nevada,2.4,,
five,2002,Nevada,2.9,,
six,2003,Nevada,3.2,,


In [78]:
frame2.loc['four']

year       2001
state    Nevada
pop         2.4
debt        NaN
size        NaN
Name: four, dtype: object

In [80]:
frame2['debt'] = np.arange(6.)
frame2['size'] = pd.Series({'one': 1.0, 'six': 101.0})
frame2['City'] = pd.Series({'two': 'My city', 'five': 'Large'})
frame2

Unnamed: 0,year,state,pop,debt,size,City
one,2000,Ohio,1.5,0.0,1.0,
two,2001,Ohio,1.7,1.0,,My city
three,2002,Ohio,3.6,2.0,,
four,2001,Nevada,2.4,3.0,,
five,2002,Nevada,2.9,4.0,,Large
six,2003,Nevada,3.2,5.0,101.0,


In [81]:
del frame2['size']
frame2

Unnamed: 0,year,state,pop,debt,City
one,2000,Ohio,1.5,0.0,
two,2001,Ohio,1.7,1.0,My city
three,2002,Ohio,3.6,2.0,
four,2001,Nevada,2.4,3.0,
five,2002,Nevada,2.9,4.0,Large
six,2003,Nevada,3.2,5.0,


In [82]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9}, 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [84]:
frame3 = pd.DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [85]:
frame3.T

Unnamed: 0,2001,2002,2000
Nevada,2.4,2.9,
Ohio,1.7,3.6,1.5


In [87]:
frame3.index.name = 'My inx'
frame3.columns.name = 'My cols'
frame3

My cols,Nevada,Ohio
My inx,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [88]:
frame3.values

array([[2.4, 1.7],
       [2.9, 3.6],
       [nan, 1.5]])

In [94]:
frame2.values

array([[2000, 'Ohio', 1.5, 0.0, nan],
       [2001, 'Ohio', 1.7, 1.0, 'My city'],
       [2002, 'Ohio', 3.6, 2.0, nan],
       [2001, 'Nevada', 2.4, 3.0, nan],
       [2002, 'Nevada', 2.9, 4.0, 'Large'],
       [2003, 'Nevada', 3.2, 5.0, nan]], dtype=object)

## Index objects

In [95]:
index1 = pd.Index(['a', 'a', 'b', 'c'])
index1

Index(['a', 'a', 'b', 'c'], dtype='object')

In [102]:
index2 = pd.Index(['b', 'd', 'c'])
out_ = [index1, index2]
for op in (index1.append, index1.difference, index1.intersection, index1.union, index1.isin):
    out_ += [op, op(index2)]
out_

[Index(['a', 'a', 'b', 'c'], dtype='object'),
 Index(['b', 'd', 'c'], dtype='object'),
 <bound method Index.append of Index(['a', 'a', 'b', 'c'], dtype='object')>,
 Index(['a', 'a', 'b', 'c', 'b', 'd', 'c'], dtype='object'),
 <bound method Index.difference of Index(['a', 'a', 'b', 'c'], dtype='object')>,
 Index(['a'], dtype='object'),
 <bound method Index.intersection of Index(['a', 'a', 'b', 'c'], dtype='object')>,
 Index(['b', 'c'], dtype='object'),
 <bound method Index.union of Index(['a', 'a', 'b', 'c'], dtype='object')>,
 Index(['a', 'a', 'b', 'c', 'd'], dtype='object'),
 <bound method Index.isin of Index(['a', 'a', 'b', 'c'], dtype='object')>,
 array([False, False,  True,  True])]

## Essentials

In [105]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [106]:
obj4 =obj3.reindex(np.arange(6.), method='ffill')
obj4

0.0      blue
1.0      blue
2.0    purple
3.0    purple
4.0    yellow
5.0    yellow
dtype: object

In [127]:
frame = pd.DataFrame(np.arange(16).reshape((4, 4)),
                     index = list('acde'),
                     columns= list('ABCD')
                    )
frame

Unnamed: 0,A,B,C,D
a,0,1,2,3
c,4,5,6,7
d,8,9,10,11
e,12,13,14,15


In [128]:
frame.reindex(list('adef'), columns=list('ACDEFG'))

Unnamed: 0,A,C,D,E,F,G
a,0.0,2.0,3.0,,,
d,8.0,10.0,11.0,,,
e,12.0,14.0,15.0,,,
f,,,,,,


In [129]:
frame.loc['d':'e', 'B':'D']

Unnamed: 0,B,C,D
d,9,10,11
e,13,14,15


In [143]:
frame[frame['C'] <= 10.]

Unnamed: 0,A,B,C,D
a,0,1,2,3
c,4,5,6,7
d,8,9,10,11


In [150]:
frame < 6.

Unnamed: 0,A,B,C,D
a,True,True,True,True
c,True,True,False,False
d,False,False,False,False
e,False,False,False,False


In [153]:
np.nan, np.inf, np.infty == np.inf

(nan, inf, True)

In [154]:
np.nan??

[0;31mType:[0m        float
[0;31mString form:[0m nan
[0;31mDocstring:[0m   Convert a string or number to a floating point number, if possible.


In [155]:
np.inf??

[0;31mType:[0m        float
[0;31mString form:[0m inf
[0;31mDocstring:[0m   Convert a string or number to a floating point number, if possible.
