# Introduction to Pandas

Pandas documentation: http://pandas.pydata.org/pandas-docs/stable/ (homepage: https://pandas.pydata.org/)

Notes from Chapter 3 of https://jakevdp.github.io/PythonDataScienceHandbook/

In [2]:
import numpy as np
import pandas as pd
np.__version__, pd.__version__

('1.14.0', '0.22.0')

In [3]:
# show
def show(data, show_data = 0):
    print (" Index: {:}".format(data.index))
    print ("Length: {:}".format(len(data)))
    if show_data:
        print(data.values)

In [None]:
# Create a Pandas Series object - uses the default index
d1 = pd.Series([0.25, 0.5, 0.75, 1.0])
show(d1, 1)
d1

In [None]:
# Create the Series with the specfied index values
d2 = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=['a', 'b', 'c', 'd'])
show(d2, 1)
d2

In [None]:
d1[3]

In [None]:
d1[1:3]

In [None]:
d2['a']

In [None]:
d2['b':'d']

In [4]:
# Creating a Series from a dictionary.  Note that that Pandas object sorts
# the series using the dictionary key sort order.
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
show(population, 1)
population

 Index: Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object')
Length: 5
[38332521 19552860 12882135 19651127 26448193]


California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [None]:
# Looks like a dictionary ....
population['Illinois']

In [None]:
# but with list-like slicing
population['Illinois':'Texas']

In [None]:
# along with "regular" slicing
population[2:5]

In [6]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
show(area,1)
area

 Index: Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object')
Length: 5
[423967 170312 149995 141297 695662]


California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
dtype: int64

In [9]:
states = pd.DataFrame({'population': population,'area': area})
show(states,1)
states

 Index: Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object')
Length: 5
[[  423967 38332521]
 [  170312 19552860]
 [  149995 12882135]
 [  141297 19651127]
 [  695662 26448193]]


Unnamed: 0,area,population
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [11]:
ind1 = states.index
ind1

Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object')

In [12]:
ind1[2:]

Index(['Illinois', 'New York', 'Texas'], dtype='object')

In [14]:
ind1[-2:]

Index(['New York', 'Texas'], dtype='object')

In [15]:
ind1[::2]

Index(['California', 'Illinois', 'Texas'], dtype='object')