In [2]:
import numpy as np
import pandas as pd

In [8]:
data = [{'a':i, 'b':2*i} for i in range(3)]
output = pd.DataFrame(data)
output

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [10]:
data = pd.Series([1, 2, 3, 4, 5, 6], index = ['a', 'b', 'c', 'd', 'e', 'f'])
data

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int64

In [11]:
data['e']

5

In [13]:
'c' in data

True

In [23]:
#data.keys
data.keys()

Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object')

In [56]:
data.values
# will not work
#data.values()

array(['a', 'i', 'o', 'g'], dtype=object)

In [17]:
list(data.items())

[('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5), ('f', 6)]

In [34]:
data['f'] = 9
data['g'] = 8
data

a    1
b    2
c    3
d    4
e    5
f    9
g    8
dtype: int64

In [31]:
# slicing by explicit index
data['a':'c']

a    1
b    2
c    3
dtype: int64

In [33]:
# slicing by implicit integer index
data[0:2]

a    1
b    2
dtype: int64

In [35]:
# masking
data[(data > 3) & (data < 8)]

d    4
e    5
dtype: int64

In [36]:
# fancy indexing
data[['a', 'g', 'c']]

a    1
g    8
c    3
dtype: int64

In [37]:
# explict index - loc
# implicit index - iloc 

In [38]:
data = pd.Series(['a', 'i', 'o', 'g'], index = [1, 2, 3, 4])
data

1    a
2    i
3    o
4    g
dtype: object

In [40]:
#explict index
data[1]

'a'

In [42]:
#implicit index
data[1:3]

2    i
3    o
dtype: object

In [3]:
population_dict = {'California': 38332521,'Texas': 26448193,'New York': 19651127,'Florida': 19552860,'Illinois': 12882135}
population = pd.Series(population_dict)
#population = pd.DataFrame(population, columns = ['population'])
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [6]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
#area = pd.DataFrame({'area':area})
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [7]:
states = pd.DataFrame({'population':population, 'area':area})
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [46]:
# dictionary style index
#states['area']
#attribute style index - will not work in all cases
states.area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [48]:
states.area is states['area']

True

In [50]:
states['density'] = states['population']/states['area']
states

Unnamed: 0,population,area,density
California,38332521,423967,90.413926
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746
Florida,19552860,170312,114.806121
Illinois,12882135,149995,85.883763


In [54]:
# dataframe as two dimensional array
states.values

# will not work
# states.values()

array([[3.83325210e+07, 4.23967000e+05, 9.04139261e+01],
       [2.64481930e+07, 6.95662000e+05, 3.80187404e+01],
       [1.96511270e+07, 1.41297000e+05, 1.39076746e+02],
       [1.95528600e+07, 1.70312000e+05, 1.14806121e+02],
       [1.28821350e+07, 1.49995000e+05, 8.58837628e+01]])

In [55]:
# transpose
states.T

Unnamed: 0,California,Texas,New York,Florida,Illinois
population,38332520.0,26448190.0,19651130.0,19552860.0,12882140.0
area,423967.0,695662.0,141297.0,170312.0,149995.0
density,90.41393,38.01874,139.0767,114.8061,85.88376


In [58]:
# passing a single “index” to a DataFrame accesses a row
states.values[0]

array([3.83325210e+07, 4.23967000e+05, 9.04139261e+01])

In [59]:
# passing a single “index” to a DataFrame accesses a column
states['density']

California     90.413926
Texas          38.018740
New York      139.076746
Florida       114.806121
Illinois       85.883763
Name: density, dtype: float64

In [60]:
states.iloc[:3, :2]

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297


In [66]:
states.loc[:'New York', :'area']

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297


In [67]:
states.ix[:3, :'area']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  retval = getattr(retval, self.name)._getitem_axis(key, axis=i)


Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297


In [69]:
# loc indexer combined with masking and fancy index
states.loc[states['density'] > 85, ['population', 'area']]

Unnamed: 0,population,area
California,38332521,423967
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [74]:
states.iloc[0, 2] = 90
states

Unnamed: 0,population,area,density
California,38332521,423967,90.0
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746
Florida,19552860,170312,114.806121
Illinois,12882135,149995,85.883763


In [75]:
states['Texas':'Florida']

Unnamed: 0,population,area,density
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746
Florida,19552860,170312,114.806121


In [76]:
states[1:4]

Unnamed: 0,population,area,density
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746
Florida,19552860,170312,114.806121


In [77]:
states[states['density'] > 70]

Unnamed: 0,population,area,density
California,38332521,423967,90.0
New York,19651127,141297,139.076746
Florida,19552860,170312,114.806121
Illinois,12882135,149995,85.883763
