# Pandas Introduction
There are three fundamental pandas data structures
    1. Series
    2. DataFrame
    3. Index
                                    < May 21/2019 >

In [1]:
import numpy as np
import pandas as pd

In [2]:
## Pandas series Object
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [3]:
# The values are simply a familiar Numpy array:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [4]:
# The index is an array-like object of type
data.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
data[0]

0.25

In [6]:
## Pandas DataFrame Object
population_dict = {'California': 38332521,
                    'Texas': 26448193,
                    'New York': 19651127,
                    'Florida': 19552860,
                    'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [7]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
dtype: int64

In [8]:
states = pd.DataFrame({'population':population,
                      'area': area})
states

Unnamed: 0,area,population
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [9]:
## Pandas Index Object
ind = pd.Index([2,3,5,7,11])
ind[::2]

Int64Index([2, 5, 11], dtype='int64')

In [21]:
## Data Indexing and Selection
# Data Selection in Series
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [22]:
data.iloc[1:3]

b    0.50
c    0.75
dtype: float64

In [27]:
data[['a','b']]

a    0.25
b    0.50
dtype: float64

In [29]:
states['population']

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
Name: population, dtype: int64

In [33]:
states['density'] = states['area'] / states['population']
states.T

Unnamed: 0,California,Florida,Illinois,New York,Texas
area,423967.0,170312.0,149995.0,141297.0,695662.0
population,38332520.0,19552860.0,12882140.0,19651130.0,26448190.0
density,0.01106024,0.008710337,0.01164364,0.007190275,0.02630282


In [36]:
states['area']['California']

423967

In [39]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
'New York': 19651127}, name='population')
population/area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64