# Data Frames :
- Python version of Excel is DataFrames.

**syntax : `pd.DataFrame(data, index/rows, columns)`**

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [3]:
np.random.seed(101)
#gives same set of numbers everytime while using random func. after using seed

In [4]:
# Using DataFrame 

df = pd.DataFrame(randn(5,4), ['A','B','C','D','E'], ['W','X','Y','Z'])

In [5]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


**Note: *Here Each of the columns are actually Pandas Series sharing common indexes to form a DataFrame as shown below :***

In [6]:
type(df['W'])

pandas.core.series.Series

In [7]:
type(df)

pandas.core.frame.DataFrame

In [8]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

### Creating Data Frame From Dictionary :

In [9]:
# It is same as creating series from dictionary
d = {'A' : [4,5,6], 'B' : [5,8,1], 'C' : [1,2,3]}

In [10]:
pd.DataFrame(d)

Unnamed: 0,A,B,C
0,4,5,1
1,5,8,2
2,6,1,3


---

# Indexing and Selection :
> **Syntax : `df_name['columnName']` OR `df_name.columnName`**

In [11]:
df['W']
# returns column

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

###  Grabbing Multiple Columns :
> **Syntax : `df_name[listOf_Columns]`**

In [12]:
df[['W','Y','X']]

Unnamed: 0,W,Y,X
A,2.70685,0.907969,0.628133
B,0.651118,-0.848077,-0.319318
C,-2.018168,0.528813,0.740122
D,0.188695,-0.933237,-0.758872
E,0.190794,2.605967,1.978757


---
## Adding New Column : 

In [13]:
df['new'] = df['W'] + df['X'] + df['Y'] + df['Z']

In [14]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,4.746778
B,0.651118,-0.319318,-0.848077,0.605965,0.089688
C,-2.018168,0.740122,0.528813,-0.589001,-1.338233
D,0.188695,-0.758872,-0.933237,0.955057,-0.548357
E,0.190794,1.978757,2.605967,0.683509,5.459028


---

## Dropping/Deleting a Column :
> syntax : `df_name.drop('columnName', axis=1)`

In [15]:
df.drop('new', axis=1)
# NOTE : This won't affect actual DataFrame 'df'

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [16]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,4.746778
B,0.651118,-0.319318,-0.848077,0.605965,0.089688
C,-2.018168,0.740122,0.528813,-0.589001,-1.338233
D,0.188695,-0.758872,-0.933237,0.955057,-0.548357
E,0.190794,1.978757,2.605967,0.683509,5.459028


- **use `inplace=True` for dropping column from actual dataframe**

In [17]:
df.drop('new', axis=1, inplace=True)
df
# now 'new (tot)' is dropped from main dataframe

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


---
## Dropping/Deleting a ROW :
> syntax : `df_name.drop('rowName', axis=0(optional))`
- Same as dropping columns just axis is changed to 0

In [18]:
df.drop('E')

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


---

# Selecting a Row :
> Syntax: `df_name.loc['rowName']`
- This returns series containing columns as indexes

In [19]:
df.loc['A']

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [20]:
# Selection of Multiple rows
df.loc[['A','B']]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965


### Selecting a row by using indexing position :
> Syntax: `df_name.iloc[indexNumber]`

In [21]:
df.iloc[1]

W    0.651118
X   -0.319318
Y   -0.848077
Z    0.605965
Name: B, dtype: float64

In [22]:
df.iloc[[1,2]]

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001


In [23]:
df.iloc[1,2]
# This returns the value at position (1,2)

-0.8480769834036315

---

## Selecting subsets of rows and columns :
> Syntax: `df_name.loc['rowName', columnName']`

In [24]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [25]:
df.loc['B','X']

-0.31931804459303326

In [26]:
df['X']['B']

-0.31931804459303326

In [27]:
# For grabbing Subset of A,B rows with W,Y columns
# can be done by passing a list
df.loc[['A','B'], ['W','Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077


---