## DataFrames
-------
Explore all methods and feautures available for DataFrame based operations

### Background
-----

In [1]:
import pandas as pd
import numpy as np

pd.show_versions()


INSTALLED VERSIONS
------------------
commit: None
python: 3.7.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.4.0-148-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_IN
LOCALE: en_IN.ISO8859-1

pandas: 0.24.2
pytest: 4.3.1
pip: 19.0.3
setuptools: 40.8.0
Cython: 0.29.6
numpy: 1.16.2
scipy: 1.2.1
pyarrow: None
xarray: None
IPython: 7.4.0
sphinx: 1.8.5
patsy: 0.5.1
dateutil: 2.8.0
pytz: 2018.9
blosc: None
bottleneck: 1.2.1
tables: 3.5.1
numexpr: 2.6.9
feather: None
matplotlib: 3.0.3
openpyxl: 2.6.1
xlrd: 1.2.0
xlwt: 1.3.0
xlsxwriter: 1.1.5
lxml.etree: 4.3.2
bs4: 4.7.1
html5lib: 1.0.1
sqlalchemy: 1.3.1
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None
gcsfs: None


### Create a DataFrame
------

- _DataFrame of m rows and n cols_

In [2]:
df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,-2.163825,0.905178,-0.046221,-0.736055
1,0.556555,0.057252,-0.057222,-1.126403
2,-0.94503,-0.239027,-0.74331,0.484164
3,-0.190939,-0.483805,0.660973,-0.972405
4,-0.483039,-0.555896,-0.296294,1.17213
5,0.016801,0.116755,-0.619082,1.438501


### Column Names

- [list vs np.array](https://stackoverflow.com/questions/993984/what-are-the-advantages-of-numpy-over-regular-python-lists)
- [Efficient way to get columns](https://stackoverflow.com/questions/19482970/get-list-from-pandas-dataframe-column-headers)

In [3]:
list(df), df.columns.values.tolist()

(['A', 'B', 'C', 'D'], ['A', 'B', 'C', 'D'])

### Slice using columns (axis = 1)

In [4]:
df[list(df)[-2:]]

Unnamed: 0,C,D
0,-0.046221,-0.736055
1,-0.057222,-1.126403
2,-0.74331,0.484164
3,0.660973,-0.972405
4,-0.296294,1.17213
5,-0.619082,1.438501


In [5]:
df.iloc[:,:-2]

Unnamed: 0,A,B
0,-2.163825,0.905178
1,0.556555,0.057252
2,-0.94503,-0.239027
3,-0.190939,-0.483805
4,-0.483039,-0.555896
5,0.016801,0.116755


In [6]:
 df.loc[:, 'B':'D']

Unnamed: 0,B,C,D
0,0.905178,-0.046221,-0.736055
1,0.057252,-0.057222,-1.126403
2,-0.239027,-0.74331,0.484164
3,-0.483805,0.660973,-0.972405
4,-0.555896,-0.296294,1.17213
5,0.116755,-0.619082,1.438501


### Add columns

In [7]:
df['E'] = df['A'] + df['D']
df

Unnamed: 0,A,B,C,D,E
0,-2.163825,0.905178,-0.046221,-0.736055,-2.89988
1,0.556555,0.057252,-0.057222,-1.126403,-0.569848
2,-0.94503,-0.239027,-0.74331,0.484164,-0.460866
3,-0.190939,-0.483805,0.660973,-0.972405,-1.163345
4,-0.483039,-0.555896,-0.296294,1.17213,0.689091
5,0.016801,0.116755,-0.619082,1.438501,1.455302


### Modify columns

In [8]:
df = ( df - df.mean() ) / df.std()
df = df.fillna(df.mean())
df

Unnamed: 0,A,B,C,D,E
0,-1.728928,1.754871,0.273109,-0.687673,-1.591958
1,1.158484,0.169252,0.251229,-1.032091,-0.051731
2,-0.435298,-0.384788,-1.113453,0.388971,0.02031
3,0.365094,-0.842524,1.679773,-0.896213,-0.444051
4,0.055059,-0.977333,-0.224304,0.995988,0.78047
5,0.585589,0.280522,-0.866354,1.231018,1.28696
