* Provides efficient implementation of dataframes
* dataframes - row & col structure of data.
* Different col can be of different datatype
* each col will be of same data type

* Series & DataFrames

In [1]:
import pandas as pd

### Series
* 1-d array of indexed data

In [2]:
data = pd.Series([0.25,0.5,0.75,1.0])

In [3]:
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [7]:
data.keys()

RangeIndex(start=0, stop=4, step=1)

In [6]:
data.valid()

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [8]:
data.index


RangeIndex(start=0, stop=4, step=1)

In [18]:
data = pd.Series([0.4212,0.5323,0.621212],index=['a','b','c'])

In [10]:
data

a    0.4
b    0.5
c    0.6
dtype: float64

In [11]:
data['a']

0.40000000000000002

In [12]:
import numpy as np

In [19]:
np.round(data,decimals=3)

a    0.421
b    0.532
c    0.621
dtype: float64

In [20]:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}

In [22]:
# Converting Dictionary to series object
s = pd.Series(population_dict)

In [23]:
s

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [24]:
s['Texas']

26448193

In [25]:
s['Illinois':'Texas']

Illinois    12882135
New York    19651127
Texas       26448193
dtype: int64

In [26]:
data = {2:'a',3:'b',4:'c'}

In [27]:
pd.Series(data,index=[3,4])

3    b
4    c
dtype: object

In [28]:
pd.Series(5,index=[100,200,300])

100    5
200    5
300    5
dtype: int64

### Pandas DataFrame Object

In [29]:
# Converting Dictionary to series object
population = pd.Series(population_dict)

In [46]:
area_dict = {'California': 423967, 'Florida': 170312, 'New York': 141297,  'Texas': 695662,
              'Illinois': 149995}
areas = pd.Series(area_dict)

In [47]:
population

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [48]:
areas

California    423967
Florida       170312
Illinois      149995
New York      141297
Texas         695662
dtype: int64

In [49]:
# Creating dataframe from series
df = pd.DataFrame({'population':population, 'area':areas})

In [50]:
df

Unnamed: 0,area,population
California,423967,38332521
Florida,170312,19552860
Illinois,149995,12882135
New York,141297,19651127
Texas,695662,26448193


In [51]:
df.index

Index(['California', 'Florida', 'Illinois', 'New York', 'Texas'], dtype='object')

In [52]:
df.columns

Index(['area', 'population'], dtype='object')

In [55]:
df[['area']]

Unnamed: 0,area
California,423967
Florida,170312
Illinois,149995
New York,141297
Texas,695662


In [58]:
pd.DataFrame(population,columns=['pop'])

Unnamed: 0,pop
California,38332521
Florida,19552860
Illinois,12882135
New York,19651127
Texas,26448193


In [62]:
# list comprehension to create list of dictionary
data = [{'a':i, 'b':2*i} for i in range(10)]

In [60]:
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4
3,3,6
4,4,8
5,5,10
6,6,12
7,7,14
8,8,16
9,9,18


In [61]:
data

[{'a': 0, 'b': 0},
 {'a': 1, 'b': 2},
 {'a': 2, 'b': 4},
 {'a': 3, 'b': 6},
 {'a': 4, 'b': 8},
 {'a': 5, 'b': 10},
 {'a': 6, 'b': 12},
 {'a': 7, 'b': 14},
 {'a': 8, 'b': 16},
 {'a': 9, 'b': 18}]

In [65]:
# Convert multi-dimension numpy array to dataframe
pd.DataFrame(np.random.rand(3,2), columns=['abc','def'], index=['a','b','c'])

Unnamed: 0,abc,def
a,0.601453,0.636977
b,0.861963,0.414517
c,0.50474,0.708105


### Pandas's Index Object
* Index array is immutable

In [66]:
idx = pd.Index(['a','b','c'])

In [70]:
pd.DataFrame(np.random.randint(size=3,low=1,high=5),index=idx)

Unnamed: 0,0
a,1
b,3
c,1


In [71]:
idx[0] = 'x'

TypeError: Index does not support mutable operations