## Data Indexing and Selection

In [2]:
import pandas as pd
data = pd.Series([0.25,0.5,0.75,1.0], index=['a','b','c','d'])

In [3]:
data['b']

0.5

In [4]:
'a' in data

True

In [5]:
data.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [7]:
list(data.items()) # prints both key and value

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [8]:
data.items()

<zip at 0x245d186d680>

In [9]:
data['e'] = 1.25

In [10]:
data

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64

In [11]:
data['f'] = 1.50

In [12]:
data

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
f    1.50
dtype: float64

### Series as 1D array

In [16]:
data['a':'c'] #slice explicitly

a    0.25
b    0.50
c    0.75
dtype: float64

In [15]:
#slicing implicitly
data[0:2]

a    0.25
b    0.50
dtype: float64

In [17]:
#masking
data[(data >0.3) & (data < 0.8)]

b    0.50
c    0.75
dtype: float64

In [20]:
# fancy Indexing
data[['a','e']]

a    0.25
e    1.25
dtype: float64

### Indexers: loc and iloc

#

In [21]:
data = pd.Series(['a','b', 'c'], index=[1,3,5])

In [22]:
data

1    a
3    b
5    c
dtype: object

In [23]:
data[1]

'a'

In [25]:
data[1:3]

3    b
5    c
dtype: object

In [29]:
data.loc[1]

'a'

In [30]:
data.loc[1:3]

1    a
3    b
dtype: object

In [32]:
data.loc[2]

KeyError: 2

In [34]:
data.iloc[2] # gives the 2nd element based on array index position - for real

'c'

In [36]:
data.iloc[1:3] # honest indexing

3    b
5    c
dtype: object

### Data Selection in DataFrame

In [37]:
# DataFrame as a dictionary
area = pd.Series({'California': 423967, 'Texas': 695662,
                  'New York': 141297, 'Florida': 170312,
                  'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
                 'New York': 19651127, 'Florida': 19552860,
                 'Illinois': 12882135})

In [38]:
data = pd.DataFrame({'area': area, 'pop': pop})
data

Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [41]:
data['area'] # get individual `Series`

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [44]:
# OR we can use the following
data.area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [46]:
data.area is data['area'] # yes, its true

True

`pop` is part of python function for removing elements from the stack array

In [48]:
data.pop is data['pop'] #it would return false since, `pop` is part of python function for removing elements from the stack array

False

In [51]:
data['density'] = data['pop'] / data['area'] # creating a new column

In [50]:
data

Unnamed: 0,area,pop,density
California,423967,38332521,90.413926
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763
