In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame([10, 20, 30, 40],columns=['numbers'],index=['a', 'b', 'c', 'd'])

In [3]:
df

Unnamed: 0,numbers
a,10
b,20
c,30
d,40


In [4]:
df.index # The index attribute and Index object.

Index(['a', 'b', 'c', 'd'], dtype='object')

In [5]:
df.columns # The columns attribute and Index object.

Index(['numbers'], dtype='object')

In [6]:
df.loc['c'] # Selects the value corresponding to index c.

numbers    30
Name: c, dtype: int64

In [7]:
df.loc[['a','d']] # Selects the two values corresponding to indices a and d.

Unnamed: 0,numbers
a,10
d,40


In [8]:
df.iloc[1:3] # Selects the second and third rows via the index positions.

Unnamed: 0,numbers
b,20
c,30


In [9]:
df.sum() # Calculates the sum of the single column.

numbers    100
dtype: int64

In [10]:
df.apply(lambda x: x**3 ) # x to the power of 3, 
# Uses the apply() method to make further modifications using vectorized fashion.

Unnamed: 0,numbers
a,1000
b,8000
c,27000
d,64000


In [11]:
df ** 3 # Applies vectorization directly as with ndarray objects.

Unnamed: 0,numbers
a,1000
b,8000
c,27000
d,64000


In [12]:
df['numbers']

a    10
b    20
c    30
d    40
Name: numbers, dtype: int64

In [13]:
# add new column
df['floats'] = (1.5,2.5,3.5,4.5)
df

Unnamed: 0,numbers,floats
a,10,1.5
b,20,2.5
c,30,3.5
d,40,4.5


In [14]:
# create new column using DataFrame Object to also specify the index location
df['names'] = pd.DataFrame(['Yves','Sandra','Lilli','Henry'],index=['d','a','b','c'])
df

Unnamed: 0,numbers,floats,names
a,10,1.5,Sandra
b,20,2.5,Lilli
c,30,3.5,Henry
d,40,4.5,Yves


In [15]:
# Append new data, note that index values changes to range based.
df.append({'numbers':100,'floats':5.75,'names':'Jill'}, ignore_index=True)
# this is a temporary operation the value does not get perserved.

Unnamed: 0,numbers,floats,names
0,10,1.5,Sandra
1,20,2.5,Lilli
2,30,3.5,Henry
3,40,4.5,Yves
4,100,5.75,Jill


In [16]:
# to perserve the append, it must be assigned back
df = df.append(pd.DataFrame({'numbers':100,'floats':5.75,'names':'Jill'}, index=['y']))
df


Unnamed: 0,numbers,floats,names
a,10,1.5,Sandra
b,20,2.5,Lilli
c,30,3.5,Henry
d,40,4.5,Yves
y,100,5.75,Jill


In [17]:
# incomplete dataFrames can be appended as well
df = df.append(pd.DataFrame({'names':'Liz'},index=['z']),sort=False)
df

Unnamed: 0,numbers,floats,names
a,10.0,1.5,Sandra
b,20.0,2.5,Lilli
c,30.0,3.5,Henry
d,40.0,4.5,Yves
y,100.0,5.75,Jill
z,,,Liz


In [18]:
# display the data types of the columns.
df.dtypes

numbers    float64
floats     float64
names       object
dtype: object

In [19]:
# Calculate mean (ignores NaN)
df[['numbers','floats']].mean()

numbers    40.00
floats      3.55
dtype: float64

In [20]:
# simple std (ignores NaN)
df[['numbers','floats']].std()

numbers    35.355339
floats      1.662077
dtype: float64