In [1]:
import numpy as np
import pandas as pd

### Series

In [2]:
s = pd.Series(np.arange(5), index=['a','b','c','d','n'])
s

a    0
b    1
c    2
d    3
n    4
dtype: int32

In [3]:
s.index

Index(['a', 'b', 'c', 'd', 'n'], dtype='object')

In [4]:
s = pd.Series(np.random.rand(5))

In [5]:
d = {'a':1,'b':2,'c':3,'t':4,'w':6}
pd.Series(d)

a    1
b    2
c    3
t    4
w    6
dtype: int64

In [6]:
pd.Series(d, index=['w','c','b','a','t'])

w    6
c    3
b    2
a    1
t    4
dtype: int64

In [7]:
pd.Series(5.0, index = ['One', 'Two', 'Three'])

One      5.0
Two      5.0
Three    5.0
dtype: float64

In [8]:
s

0    0.691497
1    0.365219
2    0.223074
3    0.977078
4    0.312415
dtype: float64

In [9]:
s[1]

0.36521930101390654

In [10]:
s[:3]

0    0.691497
1    0.365219
2    0.223074
dtype: float64

In [11]:
s[s > s.median()]

0    0.691497
3    0.977078
dtype: float64

In [12]:
s[[1,2,3]]

1    0.365219
2    0.223074
3    0.977078
dtype: float64

In [13]:
s.dtype

dtype('float64')

In [14]:
s.array

<PandasArray>
[ 0.6914971988620428, 0.36521930101390654, 0.22307386212897795,
  0.9770780186281801,  0.3124153552161344]
Length: 5, dtype: float64

In [15]:
s.to_numpy

<bound method IndexOpsMixin.to_numpy of 0    0.691497
1    0.365219
2    0.223074
3    0.977078
4    0.312415
dtype: float64>

Series is dict-like

In [16]:
s['e'] = 12.0

In [18]:
'e' in s

True

In [19]:
'f' in s

False

In [20]:
s

0     0.691497
1     0.365219
2     0.223074
3     0.977078
4     0.312415
e    12.000000
dtype: float64

In [21]:
s.get('f', np.nan)

nan

In [22]:
s

0     0.691497
1     0.365219
2     0.223074
3     0.977078
4     0.312415
e    12.000000
dtype: float64

Vectorized operations and label alignment with Series

In [23]:
s + s

0     1.382994
1     0.730439
2     0.446148
3     1.954156
4     0.624831
e    24.000000
dtype: float64

In [24]:
s * 2

0     1.382994
1     0.730439
2     0.446148
3     1.954156
4     0.624831
e    24.000000
dtype: float64

In [25]:
np.exp(s)

0         1.996703
1         1.440830
2         1.249913
3         2.656682
4         1.366722
e    162754.791419
dtype: float64

In [28]:
s[2:] + s[:-1]

0         NaN
1         NaN
2    0.446148
3    1.954156
4    0.624831
e         NaN
dtype: float64

In [29]:
s = pd.Series(np.random.rand(5), name='Something')

In [30]:
s.name

'Something'

In [33]:
s= s.rename('RndNum')

In [34]:
s.name

'RndNum'

### DataFrame

DataFrame from Dictionary

In [36]:
d = {
    'One': pd.Series([1.0, 2.0, 3.0], index=['a','b','c']),
    'Two': pd.Series([23, 45, 12, 1], index=['a', 'b', 'c', 'd'])
}

In [37]:
df = pd.DataFrame(d)
df

Unnamed: 0,One,Two
a,1.0,23
b,2.0,45
c,3.0,12
d,,1


In [40]:
pd.DataFrame(d, index = ['a', 'b', 'c'])

Unnamed: 0,One,Two
a,1.0,23
b,2.0,45
c,3.0,12


In [47]:
pd.DataFrame(d, index = ['a', 'b', 'c', 'd'], columns = ['Two', 'three'])

Unnamed: 0,Two,three
a,23,
b,45,
c,12,
d,1,


In [48]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [49]:
df.columns

Index(['One', 'Two'], dtype='object')

From dict of ndarrays

In [50]:
d = {'One': [1,2,3,4], 'Two': [4,5,7,9]}

In [52]:
df = pd.DataFrame(d)
df

Unnamed: 0,One,Two
0,1,4
1,2,5
2,3,7
3,4,9


In [53]:
df = pd.DataFrame(d, index = ['a', 'b', 'c', 'd'])
df

Unnamed: 0,One,Two
a,1,4
b,2,5
c,3,7
d,4,9


From structured or record array

In [54]:
data = np.zeros((2,), dtype= [('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
data

array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [55]:
data[:] = [(1,2,'Hello'), (2, 6, 'World')]
data

array([(1, 2., b'Hello'), (2, 6., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [57]:
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,6.0,b'World'


In [59]:
df = pd.DataFrame(data, index=['First', 'Second'])
df

Unnamed: 0,A,B,C
First,1,2.0,b'Hello'
Second,2,6.0,b'World'


In [60]:
pd.DataFrame(
     {
         ("a", "b"): {("A", "B"): 1, ("A", "C"): 2},
         ("a", "a"): {("A", "C"): 3, ("A", "B"): 4},
         ("a", "c"): {("A", "B"): 5, ("A", "C"): 6},
         ("b", "a"): {("A", "C"): 7, ("A", "B"): 8},
         ("b", "b"): {("A", "D"): 9, ("A", "B"): 10},
     }
)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


From a list of namedtuples

In [64]:
from collections import namedtuple
point = namedtuple('Point', 'x y')
df = pd.DataFrame([point(0, 0), point(0, 3), (2, 3)])
df

Unnamed: 0,x,y
0,0,0
1,0,3
2,2,3


In [68]:
from dataclasses import make_dataclass
point = make_dataclass('Point', [('X', int), ('Y', int)])
df = pd.DataFrame([point(1, 2), point(5,6), point(8,3)])
df

Unnamed: 0,X,Y
0,1,2
1,5,6
2,8,3


Alternate constructors

In [72]:
pd.DataFrame.from_dict(dict([
    ('A', [1,2,4,5]),
    ('B', [3,5,7,9])
    ]))

Unnamed: 0,A,B
0,1,3
1,2,5
2,4,7
3,5,9


In [74]:
pd.DataFrame.from_dict(
    dict([('A', [1,2,4,5]), ('B', [3,5,7,9]) ]),
    orient='index',
    columns=['One', 'Two', 'Three','Four']
)

Unnamed: 0,One,Two,Three,Four
A,1,2,4,5
B,3,5,7,9


In [75]:
data

array([(1, 2., b'Hello'), (2, 6., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [76]:
pd.DataFrame.from_records(data, index='C')

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'Hello',1,2.0
b'World',2,6.0


Column selection, addition, deletion

In [79]:
df = pd.DataFrame({
    'One': [1,2,3,4],
    'Two': [4,5,7,9]
})
df

Unnamed: 0,One,Two
0,1,4
1,2,5
2,3,7
3,4,9


In [82]:
df['One']

0    1
1    2
2    3
3    4
Name: One, dtype: int64

In [80]:
df['Three'] = df['One'] + df['Two']

In [85]:
df['Four'] = df['Three'] > 10
df

Unnamed: 0,One,Two,Three,Four
0,1,4,5,False
1,2,5,7,False
2,3,7,10,False
3,4,9,13,True


In [86]:
del df['Three']

In [87]:
df

Unnamed: 0,One,Two,Four
0,1,4,False
1,2,5,False
2,3,7,False
3,4,9,True


In [89]:
df.pop('Four')

0    False
1    False
2    False
3     True
Name: Four, dtype: bool

In [90]:
df['Five'] = 'Bar'
df

Unnamed: 0,One,Two,Five
0,1,4,Bar
1,2,5,Bar
2,3,7,Bar
3,4,9,Bar


In [91]:
df['Six'] = df['One'][2:]
df

Unnamed: 0,One,Two,Five,Six
0,1,4,Bar,
1,2,5,Bar,
2,3,7,Bar,3.0
3,4,9,Bar,4.0


In [92]:
df.insert(2, 'Three', df['One']*3)
df

Unnamed: 0,One,Two,Three,Five,Six
0,1,4,3,Bar,
1,2,5,6,Bar,
2,3,7,9,Bar,3.0
3,4,9,12,Bar,4.0


Assigning new columns in method chains

In [97]:
df.assign(Seven=df['Two']/df['Three'])

Unnamed: 0,One,Two,Three,Five,Six,Seven
0,1,4,3,Bar,,1.333333
1,2,5,6,Bar,,0.833333
2,3,7,9,Bar,3.0,0.777778
3,4,9,12,Bar,4.0,0.75


In [101]:
df.loc[0]

One        1
Two        4
Three      3
Five     Bar
Six      NaN
Name: 0, dtype: object

In [110]:
df1 = pd.DataFrame(np.random.rand(10, 4), columns = ['A','B','C','D'])
df2 = pd.DataFrame(np.random.rand(7,3), columns = ['A','B','C'])
df2

Unnamed: 0,A,B,C
0,0.078281,0.860136,0.721031
1,0.6547,0.259569,0.161161
2,0.473718,0.841806,0.818113
3,0.69024,0.323617,0.213571
4,0.459603,0.933086,0.894455
5,0.066351,0.73908,0.279353
6,0.529487,0.072361,0.231203


In [111]:
df + df2

Unnamed: 0,A,B,C,D
0,0.951838,1.726682,1.395004,
1,0.706544,0.354852,0.340162,
2,0.74182,0.878282,1.655579,
3,1.561585,1.006614,1.053124,
4,0.828557,1.583436,1.520761,
5,0.24321,1.646735,1.195649,
6,1.008457,0.1458,0.770207,
7,,,,
8,,,,
9,,,,


In [112]:
df * df2

Unnamed: 0,A,B,C,D
0,0.068383,0.745348,0.485956,
1,0.033942,0.024733,0.028848,
2,0.127004,0.030706,0.685142,
3,0.601437,0.221029,0.179304,
4,0.169572,0.606832,0.560202,
5,0.011735,0.67083,0.25597,
6,0.253609,0.005314,0.124619,
7,,,,
8,,,,
9,,,,


In [113]:
df - df2

Unnamed: 0,A,B,C,D
0,0.795277,0.00641,-0.047058,
1,-0.602856,-0.164286,0.01784,
2,-0.205617,-0.80533,0.019354,
3,0.181104,0.359379,0.625982,
4,-0.090649,-0.282736,-0.26815,
5,0.110509,0.168575,0.636943,
6,-0.050517,0.001077,0.307801,
7,,,,
8,,,,
9,,,,


In [114]:
df1 = pd.DataFrame({"a": [1, 0, 1], "b": [0, 1, 1]}, dtype=bool)
df2 = pd.DataFrame({"a": [0, 1, 1], "b": [1, 1, 0]}, dtype=bool)

In [115]:
df1 | df2

Unnamed: 0,a,b
0,True,True
1,True,True
2,True,True


In [116]:
df1 & df2

Unnamed: 0,a,b
0,False,False
1,False,True
2,True,False


In [118]:
df1

Unnamed: 0,a,b
0,True,False
1,False,True
2,True,True


In [119]:
df1.T

Unnamed: 0,0,1,2
a,True,False,True
b,False,True,True


In [120]:
ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
ser2 = pd.Series([1, 3, 5], index=["b", "a", "c"])
ser1, ser2

(a    1
 b    2
 c    3
 dtype: int64,
 b    1
 a    3
 c    5
 dtype: int64)

In [121]:
np.remainder(ser1, ser2)

a    1
b    0
c    3
dtype: int64

In [123]:
ser = pd.Series([1, 9, 3])
idx = pd.Index([4, 5, 6])
np.maximum(ser, idx)

0    4
1    9
2    6
dtype: int64

In [124]:
ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])

In [125]:
ser1.a

1

In [126]:
ser1.c

3