### Stacking dataframes on top of one another (similar to vstack):

In [6]:
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.random.rand(2, 4))
df = pd.concat([df1, df1, df1], ignore_index=True)
df

Unnamed: 0,0,1,2,3
0,0.52744,0.641283,0.50615,0.489345
1,0.977062,0.589091,0.752786,0.106942
2,0.52744,0.641283,0.50615,0.489345
3,0.977062,0.589091,0.752786,0.106942
4,0.52744,0.641283,0.50615,0.489345
5,0.977062,0.589091,0.752786,0.106942


### Stacking dataframes horizontally (similar to hstack):

In [9]:
pd.concat([df1, df1, df1], ignore_index=True, axis = 1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.52744,0.641283,0.50615,0.489345,0.52744,0.641283,0.50615,0.489345,0.52744,0.641283,0.50615,0.489345
1,0.977062,0.589091,0.752786,0.106942,0.977062,0.589091,0.752786,0.106942,0.977062,0.589091,0.752786,0.106942


### Applying function to all columns (or rows) of df:

In [13]:
print df.apply(lambda x: x.sum(), axis = 0)
print df.apply(lambda x: x.sum(), axis = 1)

0    4.513507
1    3.691123
2    3.776808
3    1.788861
dtype: float64
0    2.164219
1    2.425881
2    2.164219
3    2.425881
4    2.164219
5    2.425881
dtype: float64


### Dropping all NaN values:

In [24]:
df.iloc[0, 3] = None
df.iloc[2, 1] = None
#df.dropna()
print df
df.dropna()

          0         1         2         3
0  0.527440  0.641283  0.506150       NaN
1  0.977062  0.589091  0.752786  0.106942
2  0.527440       NaN  0.506150  0.489345
3  0.977062  0.589091  0.752786  0.106942
4  0.527440  0.641283  0.506150  0.489345
5  0.977062  0.589091  0.752786  0.106942


Unnamed: 0,0,1,2,3
1,0.977062,0.589091,0.752786,0.106942
3,0.977062,0.589091,0.752786,0.106942
4,0.52744,0.641283,0.50615,0.489345
5,0.977062,0.589091,0.752786,0.106942


### loc and iloc in pandas:
When we select some rows from a df (e.g., using a where clause), the index of the rows is preserved in the new df. So for the new df, df.index is not necessarily 0, 1, etc. (They dont even have to be numbers). To work with these indexes, use df.loc. To ignore these indexes and use an integer index from zero, use iloc.  

In [3]:
f = pd.read_csv('features.csv')

In [10]:
f_holiday = f[f.IsHoliday == True]
f_holiday.head(10)

Unnamed: 0,Store,Date,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,IsHoliday
1,1,2010-02-12,38.51,2.548,,,,,,211.24217,8.106,True
31,1,2010-09-10,78.69,2.565,,,,,,211.49519,7.787,True
42,1,2010-11-26,64.52,2.735,,,,,,211.748433,7.838,True
47,1,2010-12-31,48.43,2.943,,,,,,211.404932,7.838,True
53,1,2011-02-11,36.39,3.022,,,,,,212.936705,7.742,True
83,1,2011-09-09,76.0,3.546,,,,,,215.861056,7.962,True
94,1,2011-11-25,60.14,3.236,410.31,98.0,55805.51,8.0,554.92,218.467621,7.866,True
99,1,2011-12-30,44.55,3.129,5762.1,46011.38,260.36,983.65,4735.78,219.53599,7.866,True
105,1,2012-02-10,48.02,3.409,13925.06,6927.23,101.64,8471.88,6886.04,220.265178,7.348,True
135,1,2012-09-07,83.96,3.73,5204.68,35.74,50.94,4120.32,2737.17,222.439015,6.908,True


In [8]:
f_holiday.iloc[0, :]

Store                    1
Date            2010-02-12
Temperature          38.51
Fuel_Price           2.548
MarkDown1              NaN
MarkDown2              NaN
MarkDown3              NaN
MarkDown4              NaN
MarkDown5              NaN
CPI                211.242
Unemployment         8.106
IsHoliday             True
Name: 1, dtype: object

In [9]:
f_holiday.loc[0, :]

KeyError: 'the label [0] is not in the [index]'

Empty array of type object:

In [18]:
np.empty((2, 3, 4), dtype = object)

array([[[None, None, None, None],
        [None, None, None, None],
        [None, None, None, None]],

       [[None, None, None, None],
        [None, None, None, None],
        [None, None, None, None]]], dtype=object)