### Pandas
#### The dominant python data analysis package

* NumPy tables with labels
* Powerful indexing
* Modify table structure and other transformations
* Handles many data formats
* Deals with missing data
* Implements database operations
* Makes plots

### Pandas DataFrame and Series

In [1]:
import math
import collections
import urllib

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

%matplotlib inline


In [2]:
pd.options.display.max_rows = 16

In [3]:
nobels = pd.read_csv('nobels.csv',names=['year','discipline','nobelist'])
nobels

Unnamed: 0,year,discipline,nobelist
0,1901,Chemistry,Jacobus Henricus van 't Hoff
1,1901,Literature,Sully Prudhomme
2,1901,Medicine,Emil Adolf von Behring
3,1901,Peace,Frédéric Passy
4,1901,Peace,Henry Dunant
...,...,...,...
945,2019,Medicine,William Kaelin Jr.
946,2019,Peace,Abiy Ahmed
947,2019,Physics,Didier Queloz
948,2019,Physics,James Peebles


In [4]:
nobels.info

<bound method DataFrame.info of      year  discipline                      nobelist
0    1901   Chemistry  Jacobus Henricus van 't Hoff
1    1901  Literature               Sully Prudhomme
2    1901    Medicine        Emil Adolf von Behring
3    1901       Peace                Frédéric Passy
4    1901       Peace                  Henry Dunant
..    ...         ...                           ...
945  2019    Medicine            William Kaelin Jr.
946  2019       Peace                    Abiy Ahmed
947  2019     Physics                 Didier Queloz
948  2019     Physics                 James Peebles
949  2019     Physics                  Michel Mayor

[950 rows x 3 columns]>

In [5]:
nobels.head()

Unnamed: 0,year,discipline,nobelist
0,1901,Chemistry,Jacobus Henricus van 't Hoff
1,1901,Literature,Sully Prudhomme
2,1901,Medicine,Emil Adolf von Behring
3,1901,Peace,Frédéric Passy
4,1901,Peace,Henry Dunant


In [6]:
nobels.tail()

Unnamed: 0,year,discipline,nobelist
945,2019,Medicine,William Kaelin Jr.
946,2019,Peace,Abiy Ahmed
947,2019,Physics,Didier Queloz
948,2019,Physics,James Peebles
949,2019,Physics,Michel Mayor


In [7]:
len(nobels)

950

In [8]:
nobels.columns

Index(['year', 'discipline', 'nobelist'], dtype='object')

In [10]:
nobels.dtypes

year           int64
discipline    object
nobelist      object
dtype: object

In [11]:
nobels.index

RangeIndex(start=0, stop=950, step=1)

In [12]:
nobels['discipline']

0       Chemistry
1      Literature
2        Medicine
3           Peace
4           Peace
          ...    
945      Medicine
946         Peace
947       Physics
948       Physics
949       Physics
Name: discipline, Length: 950, dtype: object

In [13]:
nobels.nobelist

0      Jacobus Henricus van 't Hoff
1                   Sully Prudhomme
2            Emil Adolf von Behring
3                    Frédéric Passy
4                      Henry Dunant
                   ...             
945              William Kaelin Jr.
946                      Abiy Ahmed
947                   Didier Queloz
948                   James Peebles
949                    Michel Mayor
Name: nobelist, Length: 950, dtype: object

In [14]:
nobels.discipline.values[:50]

array(['Chemistry', 'Literature', 'Medicine', 'Peace', 'Peace', 'Physics',
       'Chemistry', 'Literature', 'Medicine', 'Peace', 'Peace', 'Physics',
       'Physics', 'Chemistry', 'Literature', 'Medicine', 'Peace',
       'Physics', 'Physics', 'Physics', 'Chemistry', 'Literature',
       'Literature', 'Medicine', 'Peace', 'Physics', 'Chemistry',
       'Literature', 'Medicine', 'Peace', 'Physics', 'Chemistry',
       'Literature', 'Medicine', 'Medicine', 'Peace', 'Physics',
       'Chemistry', 'Literature', 'Medicine', 'Peace', 'Peace', 'Physics',
       'Chemistry', 'Literature', 'Medicine', 'Medicine', 'Peace',
       'Peace', 'Physics'], dtype=object)

In [16]:
nobels.discipline.unique()

array(['Chemistry', 'Literature', 'Medicine', 'Peace', 'Physics',
       'Economics'], dtype=object)

In [18]:
#Checking how many times a nobelist has won a nobel
nobels.nobelist.value_counts()

International Committee of the Red Cross         3
United Nations High Commissioner for Refugees    2
John Bardeen                                     2
Linus Pauling                                    2
Frederick Sanger                                 2
                                                ..
Charles J. Pedersen                              1
Nikolay Basov                                    1
Osamu Shimomura                                  1
George de Hevesy                                 1
Vincent du Vigneaud                              1
Name: nobelist, Length: 943, dtype: int64

In [27]:
#Checking the Wangari Maathai Nobel price
nobels[nobels['nobelist'] == 'Wangari Maathai']

Unnamed: 0,year,discipline,nobelist
766,2004,Peace,Wangari Maathai


In [28]:
nobels[nobels.discipline == 'Physics']

Unnamed: 0,year,discipline,nobelist
5,1901,Physics,Wilhelm Röntgen
11,1902,Physics,Hendrik Lorentz
12,1902,Physics,Pieter Zeeman
17,1903,Physics,Henri Becquerel
18,1903,Physics,Marie Curie
...,...,...,...
934,2018,Physics,Donna Strickland
935,2018,Physics,Gérard Mourou
947,2019,Physics,Didier Queloz
948,2019,Physics,James Peebles


In [29]:
#checking all the nobels that were awarded in Medicine
nobels[nobels.discipline == 'Medicine']

Unnamed: 0,year,discipline,nobelist
2,1901,Medicine,Emil Adolf von Behring
8,1902,Medicine,Ronald Ross
15,1903,Medicine,Niels Ryberg Finsen
23,1904,Medicine,Ivan Pavlov
28,1905,Medicine,Robert Koch
...,...,...,...
929,2018,Medicine,James P. Allison
930,2018,Medicine,Tasuku Honjo
943,2019,Medicine,Gregg L. Semenza
944,2019,Medicine,Peter J. Ratcliffe


In [31]:
#an alternative way to find the nobels in a single discipline
nobels.query('discipline == "Peace"')

Unnamed: 0,year,discipline,nobelist
3,1901,Peace,Frédéric Passy
4,1901,Peace,Henry Dunant
9,1902,Peace,Charles Albert Gobat
10,1902,Peace,Élie Ducommun
16,1903,Peace,Randal Cremer
...,...,...,...
907,2016,Peace,Juan Manuel Santos
919,2017,Peace,International Campaign to Abolish Nuclear Weapons
931,2018,Peace,Denis Mukwege
932,2018,Peace,Nadia Murad


In [32]:
nobels['Curie' in nobels.nobelist]

KeyError: False

In [34]:
nobels[nobels.nobelist.str.contains('Curie')]

Unnamed: 0,year,discipline,nobelist
18,1903,Physics,Marie Curie
19,1903,Physics,Pierre Curie
62,1911,Chemistry,Marie Curie
178,1935,Chemistry,Frédéric Joliot-Curie
179,1935,Chemistry,Irène Joliot-Curie


In [36]:
nobels[nobels.nobelist.str.contains('Wangari')]

Unnamed: 0,year,discipline,nobelist
766,2004,Peace,Wangari Maathai


In [38]:
disco = np.load('discography.npy')
disco

array([('David Bowie', '1969-11-14', 17),
       ('The Man Who Sold the World', '1970-11-04',  3),
       ('Hunky Dory', '1971-12-17',  5),
       ('Ziggy Stardust', '1972-06-16',  1),
       ('Aladdin Sane', '1973-04-13',  1), ('Pin Ups', '1973-10-19',  1),
       ('Diamond Dogs', '1974-05-24',  1),
       ('Young Americans', '1975-03-07',  2),
       ('Station To Station', '1976-01-23',  5),
       ('Low', '1977-01-14',  2), ('Heroes', '1977-10-14',  3),
       ('Lodger', '1979-05-18',  4)],
      dtype=[('title', '<U32'), ('release', '<M8[D]'), ('toprank', '<i8')])

In [40]:
disco_df = pd.DataFrame(disco)
disco_df


Unnamed: 0,title,release,toprank
0,David Bowie,1969-11-14,17
1,The Man Who Sold the World,1970-11-04,3
2,Hunky Dory,1971-12-17,5
3,Ziggy Stardust,1972-06-16,1
4,Aladdin Sane,1973-04-13,1
5,Pin Ups,1973-10-19,1
6,Diamond Dogs,1974-05-24,1
7,Young Americans,1975-03-07,2
8,Station To Station,1976-01-23,5
9,Low,1977-01-14,2


In [42]:
disco_df.dtypes

title              object
release    datetime64[ns]
toprank             int64
dtype: object

In [43]:
pd.DataFrame([{'Title':'David Bowie', 'Year': 1969},
             {'Title': 'The Man Who Sold the World', 'Year': 1970},
             {'Title': 'Hunky Dory', 'Year': 1971}])

Unnamed: 0,Title,Year
0,David Bowie,1969
1,The Man Who Sold the World,1970
2,Hunky Dory,1971


In [45]:
pd.DataFrame([('Ziggy Stardust',1),('Aladdin Sane',1),('Pin Ups',1),('Prince of Egypt',4)], columns=['Title','TopRank'])

Unnamed: 0,Title,TopRank
0,Ziggy Stardust,1
1,Aladdin Sane,1
2,Pin Ups,1
3,Prince of Egypt,4


In [46]:
disco['title'],disco['release']

(array(['David Bowie', 'The Man Who Sold the World', 'Hunky Dory',
        'Ziggy Stardust', 'Aladdin Sane', 'Pin Ups', 'Diamond Dogs',
        'Young Americans', 'Station To Station', 'Low', 'Heroes', 'Lodger'],
       dtype='<U32'),
 array(['1969-11-14', '1970-11-04', '1971-12-17', '1972-06-16',
        '1973-04-13', '1973-10-19', '1974-05-24', '1975-03-07',
        '1976-01-23', '1977-01-14', '1977-10-14', '1979-05-18'],
       dtype='datetime64[D]'))

In [48]:
pd.DataFrame({'title':['David Bowie', 'The Man Who Sold the World', 'Hunky Dory',
        'Ziggy Stardust', 'Aladdin Sane', 'Pin Ups', 'Diamond Dogs',
        'Young Americans', 'Station To Station', 'Low', 'Heroes', 'Lodger'],
                       'release':['1969-11-14', '1970-11-04', '1971-12-17', '1972-06-16',
        '1973-04-13', '1973-10-19', '1974-05-24', '1975-03-07',
        '1976-01-23', '1977-01-14', '1977-10-14', '1979-05-18']})

Unnamed: 0,title,release
0,David Bowie,1969-11-14
1,The Man Who Sold the World,1970-11-04
2,Hunky Dory,1971-12-17
3,Ziggy Stardust,1972-06-16
4,Aladdin Sane,1973-04-13
5,Pin Ups,1973-10-19
6,Diamond Dogs,1974-05-24
7,Young Americans,1975-03-07
8,Station To Station,1976-01-23
9,Low,1977-01-14


### EOD