In [1]:
import pandas as pd
import numpy as np

# Basic Ingredient - Series

In [2]:
pd.Series(np.random.rand(5), index=['a', 'b', 'c', 'd', 'e'])

a    0.360391
b    0.890297
c    0.995858
d    0.833484
e    0.653361
dtype: float64

In [3]:
pd.Series(np.random.rand(5))

0    0.074973
1    0.132257
2    0.447251
3    0.875502
4    0.805989
dtype: float64

In [4]:
pd.Series({'a': 1, 'b': 2, 'c': 3})

a    1
b    2
c    3
dtype: int64

In [5]:
pd.Series(42, index=range(5))

0    42
1    42
2    42
3    42
4    42
dtype: int64

## Accessing data

In [6]:
s = pd.Series(np.random.rand(5), index=['a', 'b', 'c', 'd', 'e'])

In [8]:
s

a    0.911628
b    0.013579
c    0.389622
d    0.574887
e    0.155187
dtype: float64

In [7]:
s[2]

0.38962186973144786

In [9]:
s['c']

0.38962186973144786

In [10]:
'b' in s

True

In [11]:
'z' in s

False

In [13]:
s.get('c', float('nan'))

0.38962186973144786

In [14]:
s.get('z', float('nan'))

nan

In [15]:
s[1:]

b    0.013579
c    0.389622
d    0.574887
e    0.155187
dtype: float64

In [16]:
s[2:4]

c    0.389622
d    0.574887
dtype: float64

In [17]:
s[:-2]

a    0.911628
b    0.013579
c    0.389622
dtype: float64

In [18]:
s['a':'c'] # careful

a    0.911628
b    0.013579
c    0.389622
dtype: float64

In [19]:
s.array

<PandasArray>
[  0.9116280815195884, 0.013578945111619278,  0.38962186973144786,
   0.5748872750468629,  0.15518733934479467]
Length: 5, dtype: float64

## Arithmetic

In [20]:
s + s

a    1.823256
b    0.027158
c    0.779244
d    1.149775
e    0.310375
dtype: float64

In [21]:
s * 2

a    1.823256
b    0.027158
c    0.779244
d    1.149775
e    0.310375
dtype: float64

In [22]:
s ** 2

a    0.831066
b    0.000184
c    0.151805
d    0.330495
e    0.024083
dtype: float64

In [23]:
np.sin(s)

a    0.790502
b    0.013579
c    0.379839
d    0.543740
e    0.154565
dtype: float64

In [24]:
s.apply(lambda x: x + 1)

a    1.911628
b    1.013579
c    1.389622
d    1.574887
e    1.155187
dtype: float64

In [25]:
s + 1

a    1.911628
b    1.013579
c    1.389622
d    1.574887
e    1.155187
dtype: float64

In [26]:
s + s[1:]

a         NaN
b    0.027158
c    0.779244
d    1.149775
e    0.310375
dtype: float64

In [27]:
np.exp(s + s[1:])

a         NaN
b    1.027530
c    2.179823
d    3.157481
e    1.363936
dtype: float64

# Data Frames

In [28]:
noble_gases = pd.DataFrame(
    {'Atomic Number': [2, 10, 18, 32],
     'Atomic Radius': [31, 38, 71, 88]},
    index=['He', 'Ne', 'Ar', 'Kr']
)

In [29]:
noble_gases

Unnamed: 0,Atomic Number,Atomic Radius
He,2,31
Ne,10,38
Ar,18,71
Kr,32,88


In [30]:
noble_gases['Atomic Number']

He     2
Ne    10
Ar    18
Kr    32
Name: Atomic Number, dtype: int64

In [31]:
noble_gases['Atomic Number'].index

Index(['He', 'Ne', 'Ar', 'Kr'], dtype='object')

In [32]:
noble_gases.index

Index(['He', 'Ne', 'Ar', 'Kr'], dtype='object')

In [33]:
noble_gases['Density'] = [0.1786, 0.9002, 1.7818, 3.708]

In [34]:
noble_gases

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


In [35]:
noble_gases.Density

He    0.1786
Ne    0.9002
Ar    1.7818
Kr    3.7080
Name: Density, dtype: float64

In [37]:
noble_gases['Density']

He    0.1786
Ne    0.9002
Ar    1.7818
Kr    3.7080
Name: Density, dtype: float64

In [38]:
noble_gases['foo'] = pd.Series({'Ne': 2, 'Ar': 4})

In [39]:
noble_gases

Unnamed: 0,Atomic Number,Atomic Radius,Density,foo
He,2,31,0.1786,
Ne,10,38,0.9002,2.0
Ar,18,71,1.7818,4.0
Kr,32,88,3.708,


In [40]:
noble_gases.pop('foo')

He    NaN
Ne    2.0
Ar    4.0
Kr    NaN
Name: foo, dtype: float64

In [41]:
noble_gases

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


In [43]:
noble_gases[['Atomic Number', 'Density']]

Unnamed: 0,Atomic Number,Density
He,2,0.1786
Ne,10,0.9002
Ar,18,1.7818
Kr,32,3.708


## Accessing Rows

In [44]:
noble_gases.loc['He']

Atomic Number     2.0000
Atomic Radius    31.0000
Density           0.1786
Name: He, dtype: float64

In [45]:
noble_gases.loc['Ne':'Ar']

Unnamed: 0,Atomic Number,Atomic Radius,Density
Ne,10,38,0.9002
Ar,18,71,1.7818


In [46]:
noble_gases.loc['Ne':'Ar', 'Atomic Radius']

Ne    38
Ar    71
Name: Atomic Radius, dtype: int64

In [48]:
noble_gases.loc['Ne':'Ar', 'Atomic Radius':]

Unnamed: 0,Atomic Radius,Density
Ne,38,0.9002
Ar,71,1.7818


In [49]:
noble_gases.iloc[1:3]

Unnamed: 0,Atomic Number,Atomic Radius,Density
Ne,10,38,0.9002
Ar,18,71,1.7818


In [50]:
noble_gases.iloc[1]

Atomic Number    10.0000
Atomic Radius    38.0000
Density           0.9002
Name: Ne, dtype: float64

In [51]:
noble_gases.iloc[1:3, 1]

Ne    38
Ar    71
Name: Atomic Radius, dtype: int64

## Boolean collection

In [53]:
noble_gases['Atomic Number'] > 10

He    False
Ne    False
Ar     True
Kr     True
Name: Atomic Number, dtype: bool

In [54]:
noble_gases[noble_gases['Atomic Number'] > 10]

Unnamed: 0,Atomic Number,Atomic Radius,Density
Ar,18,71,1.7818
Kr,32,88,3.708


## Sampling

In [55]:
noble_gases.sample(n=2)

Unnamed: 0,Atomic Number,Atomic Radius,Density
Ne,10,38,0.9002
Kr,32,88,3.708


In [62]:
noble_gases.sample(n=4, replace=True)

Unnamed: 0,Atomic Number,Atomic Radius,Density
Ar,18,71,1.7818
Kr,32,88,3.708
Kr,32,88,3.708
Ne,10,38,0.9002


## Arithmetic

In [63]:
noble_gases + noble_gases

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,4,62,0.3572
Ne,20,76,1.8004
Ar,36,142,3.5636
Kr,64,176,7.416


In [64]:
noble_gases * 2

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,4,62,0.3572
Ne,20,76,1.8004
Ar,36,142,3.5636
Kr,64,176,7.416


In [65]:
np.exp(noble_gases)

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,7.389056,29048850000000.0,1.195542
Ne,22026.47,3.185593e+16,2.460095
Ar,65659970.0,6.837671e+30,5.94054
Kr,78962960000000.0,1.6516360000000001e+38,40.772181


## Transpose

In [66]:
noble_gases.T

Unnamed: 0,He,Ne,Ar,Kr
Atomic Number,2.0,10.0,18.0,32.0
Atomic Radius,31.0,38.0,71.0,88.0
Density,0.1786,0.9002,1.7818,3.708


# Data I/O

In [68]:
!cat noble_gases.csv # careful with big files

,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


In [70]:
pd.read_csv('noble_gases.csv', index_col=0)

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


In [119]:
noble_gases.to_csv('noble_gases.csv')

In [120]:
!cat noble_gases.csv # careful with big files

,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


In [75]:
[m for m in dir(pd) if m.startswith('read_')]

['read_clipboard',
 'read_csv',
 'read_excel',
 'read_feather',
 'read_fwf',
 'read_gbq',
 'read_hdf',
 'read_html',
 'read_json',
 'read_orc',
 'read_parquet',
 'read_pickle',
 'read_sas',
 'read_spss',
 'read_sql',
 'read_sql_query',
 'read_sql_table',
 'read_stata',
 'read_table',
 'read_xml']

In [76]:
[m for m in dir(noble_gases) if m.startswith('to_')]

['to_clipboard',
 'to_csv',
 'to_dict',
 'to_excel',
 'to_feather',
 'to_gbq',
 'to_hdf',
 'to_html',
 'to_json',
 'to_latex',
 'to_markdown',
 'to_numpy',
 'to_parquet',
 'to_period',
 'to_pickle',
 'to_records',
 'to_sql',
 'to_stata',
 'to_string',
 'to_timestamp',
 'to_xarray',
 'to_xml']

In [78]:
noble_gases.to_dict()

{'Atomic Number': {'He': 2, 'Ne': 10, 'Ar': 18, 'Kr': 32},
 'Atomic Radius': {'He': 31, 'Ne': 38, 'Ar': 71, 'Kr': 88},
 'Density': {'He': 0.1786, 'Ne': 0.9002, 'Ar': 1.7818, 'Kr': 3.708}}

# Reshaping data

In [79]:
noble_gases

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


In [80]:
noble_gases.stack()

He  Atomic Number     2.0000
    Atomic Radius    31.0000
    Density           0.1786
Ne  Atomic Number    10.0000
    Atomic Radius    38.0000
    Density           0.9002
Ar  Atomic Number    18.0000
    Atomic Radius    71.0000
    Density           1.7818
Kr  Atomic Number    32.0000
    Atomic Radius    88.0000
    Density           3.7080
dtype: float64

In [81]:
noble_gases.stack()['He', 'Atomic Radius']

31.0

In [82]:
noble_gases.stack().index

MultiIndex([('He', 'Atomic Number'),
            ('He', 'Atomic Radius'),
            ('He',       'Density'),
            ('Ne', 'Atomic Number'),
            ('Ne', 'Atomic Radius'),
            ('Ne',       'Density'),
            ('Ar', 'Atomic Number'),
            ('Ar', 'Atomic Radius'),
            ('Ar',       'Density'),
            ('Kr', 'Atomic Number'),
            ('Kr', 'Atomic Radius'),
            ('Kr',       'Density')],
           )

In [83]:
noble_gases.stack().unstack()

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2.0,31.0,0.1786
Ne,10.0,38.0,0.9002
Ar,18.0,71.0,1.7818
Kr,32.0,88.0,3.708


# Missing

In [84]:
noble_gases['foo'] = pd.Series({'Ar': 5, 'He': 2})

In [85]:
noble_gases

Unnamed: 0,Atomic Number,Atomic Radius,Density,foo
He,2,31,0.1786,2.0
Ne,10,38,0.9002,
Ar,18,71,1.7818,5.0
Kr,32,88,3.708,


In [86]:
noble_gases.dropna()

Unnamed: 0,Atomic Number,Atomic Radius,Density,foo
He,2,31,0.1786,2.0
Ar,18,71,1.7818,5.0


In [88]:
noble_gases.dropna(axis=1)

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


In [89]:
noble_gases.fillna(1.2)

Unnamed: 0,Atomic Number,Atomic Radius,Density,foo
He,2,31,0.1786,2.0
Ne,10,38,0.9002,1.2
Ar,18,71,1.7818,5.0
Kr,32,88,3.708,1.2


In [90]:
del noble_gases['foo']

In [92]:
noble_gases

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


# Sorting

In [95]:
noble_gases.sample(4).sort_values(by="Atomic Number")

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


In [96]:
noble_gases.sort_index()

Unnamed: 0,Atomic Number,Atomic Radius,Density
Ar,18,71,1.7818
He,2,31,0.1786
Kr,32,88,3.708
Ne,10,38,0.9002


# Combining Data

In [97]:
left = noble_gases[['Atomic Number', 'Atomic Radius']]
right = noble_gases[['Density']]

In [98]:
right

Unnamed: 0,Density
He,0.1786
Ne,0.9002
Ar,1.7818
Kr,3.708


In [99]:
noble_gases['Density']

He    0.1786
Ne    0.9002
Ar    1.7818
Kr    3.7080
Name: Density, dtype: float64

In [100]:
right.join(left)

Unnamed: 0,Density,Atomic Number,Atomic Radius
He,0.1786,2,31
Ne,0.9002,10,38
Ar,1.7818,18,71
Kr,3.708,32,88


In [101]:
pd.merge(left, right, left_index=True, right_index=True)

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708


In [104]:
left.join(right.loc['Ne':'Ar'])

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,


In [105]:
left.join(right.loc['Ne':'Ar'], how='inner')

Unnamed: 0,Atomic Number,Atomic Radius,Density
Ne,10,38,0.9002
Ar,18,71,1.7818


In [110]:
noble_gases[:'Ne']

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002


In [112]:
noble_gases['Ar':]

Unnamed: 0,Atomic Number,Atomic Radius,Density
Ar,18,71,1.7818
Kr,32,88,3.708


In [118]:
pd.concat((noble_gases[:'Ne'], noble_gases['Ar':]))

Unnamed: 0,Atomic Number,Atomic Radius,Density
He,2,31,0.1786
Ne,10,38,0.9002
Ar,18,71,1.7818
Kr,32,88,3.708
