In [1]:
import pandas as pd

# Pandas Series Object

In [3]:
# Pandas Series wraps a sequence of values and indices into a 1d array of indexed data
# creating series from a list:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [4]:
# values are a numpy array
data.values

array([ 0.25,  0.5 ,  0.75,  1.  ])

In [5]:
# index is an array-like object of type pd.Index
data.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
print(data[1])
print(data[1:3])

0.5
1    0.50
2    0.75
dtype: float64


In [9]:
# index doesn't need to be int, we can use strings
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [10]:
data['b']

0.5

In [11]:
# using nonsequential indices
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=[1, 3, 5, 7])
data[5]

0.75

In [12]:
# series is like a dictionary, but with typed indices and values for efficiency
population_dict = {'CA': 38332521,
                   'TX': 26448193,
                   'NY': 19651127,
                   'FL': 19552860,
                   'IL': 12882135}
population = pd.Series(population_dict)
population

CA    38332521
FL    19552860
IL    12882135
NY    19651127
TX    26448193
dtype: int64

In [13]:
population['CA']

38332521

In [14]:
# unlike dict, Series supports array-style operations like slicing
population['CA':'IL']

CA    38332521
FL    19552860
IL    12882135
dtype: int64

In [None]:
# constructing Series objects

In [15]:
# from a list or NP array, where it defaults to int
pd.Series([2, 4, 6])

0    2
1    4
2    6
dtype: int64

In [17]:
## from a scalar, which repeats to fill index
pd.Series(5, index=[100, 200, 300])

100    5
200    5
300    5
dtype: int64

In [18]:
#from a dict, where index defaults to sorted dict keys
pd.Series({2:'a', 1:'b', 3:'c'})

1    b
2    a
3    c
dtype: object

In [19]:
# index can be explicitly set
pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2])

3    c
2    a
dtype: object

# Pandas DataFrame Object

In [24]:
# DataFrame as generalized NumPy array

# dataframe like a 2-d array with flexible row indices and column names
# df is like a sequence of aligned Series objects in that they share the same index

area_dict = {'CA': 423967,
             'TX': 170312,
             'NY': 149995,
             'FL': 141297,
             'IL': 695662}
area = pd.Series(area_dict)

states = pd.DataFrame({'population': population,
                        'area': area})
states

Unnamed: 0,area,population
CA,423967,38332521
FL,141297,19552860
IL,695662,12882135
NY,149995,19651127
TX,170312,26448193


In [25]:
states.index

Index(['CA', 'FL', 'IL', 'NY', 'TX'], dtype='object')

In [26]:
states.columns

Index(['area', 'population'], dtype='object')

In [27]:
# DataFrame as a specialized dictionary

# dict maps a key to a value, df maps a column name to a Series of column data

states['area']

CA    423967
FL    141297
IL    695662
NY    149995
TX    170312
Name: area, dtype: int64

In [30]:
# note:
# in a NumPy array, data[0] returns the first row
# in a Pandas DF, data['col0'] would return the first column
# so better to think of a DF as a generalized dict than an array