In [1]:
import numpy as np
import pandas as pd

import seaborn as sns

In [2]:
seed = 42
rng = np.random.RandomState(seed)

In [3]:
planets = sns.load_dataset('planets')
print(f"Planets data, shape: {planets.shape}")

Planets data, shape: (1035, 6)


In [4]:
planets.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [5]:
planets.groupby('method')['orbital_period'].median()

method
Astrometry                         631.180000
Eclipse Timing Variations         4343.500000
Imaging                          27500.000000
Microlensing                      3300.000000
Orbital Brightness Modulation        0.342887
Pulsar Timing                       66.541900
Pulsation Timing Variations       1170.000000
Radial Velocity                    360.200000
Transit                              5.714932
Transit Timing Variations           57.011000
Name: orbital_period, dtype: float64

In [6]:
for (method, group) in planets.groupby('method'):
    print("{0:30s} shape={1}".format(method, group.shape))

Astrometry                     shape=(2, 6)
Eclipse Timing Variations      shape=(9, 6)
Imaging                        shape=(38, 6)
Microlensing                   shape=(23, 6)
Orbital Brightness Modulation  shape=(3, 6)
Pulsar Timing                  shape=(5, 6)
Pulsation Timing Variations    shape=(1, 6)
Radial Velocity                shape=(553, 6)
Transit                        shape=(397, 6)
Transit Timing Variations      shape=(4, 6)


In [7]:
planets.groupby('method')['year'].describe().unstack()

       method                       
count  Astrometry                          2.000000
       Eclipse Timing Variations           9.000000
       Imaging                            38.000000
       Microlensing                       23.000000
       Orbital Brightness Modulation       3.000000
       Pulsar Timing                       5.000000
       Pulsation Timing Variations         1.000000
       Radial Velocity                   553.000000
       Transit                           397.000000
       Transit Timing Variations           4.000000
mean   Astrometry                       2011.500000
       Eclipse Timing Variations        2010.000000
       Imaging                          2009.131579
       Microlensing                     2009.782609
       Orbital Brightness Modulation    2011.666667
       Pulsar Timing                    1998.400000
       Pulsation Timing Variations      2007.000000
       Radial Velocity                  2007.518987
       Transit             

In [8]:
planets.groupby('method')['year'].describe().unstack().unstack()

method,Astrometry,Eclipse Timing Variations,Imaging,Microlensing,Orbital Brightness Modulation,Pulsar Timing,Pulsation Timing Variations,Radial Velocity,Transit,Transit Timing Variations
count,2.0,9.0,38.0,23.0,3.0,5.0,1.0,553.0,397.0,4.0
mean,2011.5,2010.0,2009.131579,2009.782609,2011.666667,1998.4,2007.0,2007.518987,2011.236776,2012.5
std,2.12132,1.414214,2.781901,2.859697,1.154701,8.38451,,4.249052,2.077867,1.290994
min,2010.0,2008.0,2004.0,2004.0,2011.0,1992.0,2007.0,1989.0,2002.0,2011.0
25%,2010.75,2009.0,2008.0,2008.0,2011.0,1992.0,2007.0,2005.0,2010.0,2011.75
50%,2011.5,2010.0,2009.0,2010.0,2011.0,1994.0,2007.0,2009.0,2012.0,2012.5
75%,2012.25,2011.0,2011.0,2012.0,2012.0,2003.0,2007.0,2011.0,2013.0,2013.25
max,2013.0,2012.0,2013.0,2013.0,2013.0,2011.0,2007.0,2014.0,2014.0,2014.0


In [12]:
# Count discovered planets by method and by decade:

# print(planets['year'])

decade = 10 * (planets['year'] // 10)
# print(decade.head())
print(type(decade))

decade = decade.astype(str) + 's'
# print(decade.head())

decade.name = 'decade'
print(decade.head())

planets.groupby(['method', decade])['number'].describe().unstack().fillna(0)
# planets.groupby(['method', decade])['number'].sum().unstack().fillna(0)

<class 'pandas.core.series.Series'>
0    2000s
1    2000s
2    2010s
3    2000s
4    2000s
Name: decade, dtype: object


decade,1980s,1990s,2000s,2010s
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Astrometry,0.0,0.0,0.0,2.0
Eclipse Timing Variations,0.0,0.0,5.0,10.0
Imaging,0.0,0.0,29.0,21.0
Microlensing,0.0,0.0,12.0,15.0
Orbital Brightness Modulation,0.0,0.0,0.0,5.0
Pulsar Timing,0.0,9.0,1.0,1.0
Pulsation Timing Variations,0.0,0.0,1.0,0.0
Radial Velocity,1.0,52.0,475.0,424.0
Transit,0.0,0.0,64.0,712.0
Transit Timing Variations,0.0,0.0,0.0,9.0
