   # Introducing Pandas Objects
   
   This tutorial is adopted from: https://www.oreilly.com/learning/introducing-pandas-objects


In [12]:
import numpy as np
import pandas as pd

## Pandas Series
A Pandas Series is a one-dimensial array of indexed data

In [13]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])

In [14]:
print data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [15]:
print data.values

[ 0.25  0.5   0.75  1.  ]


In [16]:
print data.index

Int64Index([0, 1, 2, 3], dtype='int64')


In [17]:
data[1]

0.5

In [18]:
data[1:3]

1    0.50
2    0.75
dtype: float64

## Series as Generalized Numpy Array
"The essential difference is the presence of the index: while the Numpy Array has an implicitly defined integer index used to access the values, the Pandas Series has an explicitly defined index associated with the values."

In [19]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                index=['a','b','c','d'])

In [20]:
print data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64


In [21]:
data['b']

0.5

In [22]:
data[1]

0.5

Non-contigous or non-sequential indices

In [23]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                index=[2, 5, 3, 7])

In [24]:
print data

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64


In [25]:
data[2]

0.25

## Series as Specialized Dictionary

Series-as-dict

In [26]:
population_dict = {'California': 3344224,
                  'Texas': 3223423,
                  'New York': 23423432,
                  'Florida': 23423432,
                  'Illinois': 23423423}
population = pd.Series(population_dict)

In [27]:
print population

California     3344224
Florida       23423432
Illinois      23423423
New York      23423432
Texas          3223423
dtype: int64


In [28]:
population['California']

3344224

This is the best part:
                Unlike a dictionary, though, the Series also supports array-style operations such as  slicing.

In [29]:
population['California':'Florida']

California     3344224
Florida       23423432
dtype: int64

## Pandas DataFrame

In [30]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297, 'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)

In [31]:
print area

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
dtype: int64


In [34]:
states = pd.DataFrame({'populaton': population, 'area':area})

In [35]:
print states

              area  populaton
California  423967    3344224
Florida     170312   23423432
Illinois    149995   23423423
New York    141297   23423432
Texas       695662    3223423


In [36]:
states.index

Index([u'California', u'Florida', u'Illinois', u'New York', u'Texas'], dtype='object')

In [37]:
states.columns

Index([u'area', u'populaton'], dtype='object')

## DataFrame as Specialized Dictionary

In [38]:
states['area']

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
Name: area, dtype: int64