### Pandas

1. Series - Pandas Series is a one-dimensional labeled array capable of holding any data type
2. Pandas - A Pandas DataFrame is nothing but a collection of one of more Series (1+)

In [18]:
import numpy as np
import pandas as pd

#### Creating Pandas Series
pd.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)

In [4]:
#create list, numpy array and dictionary

label = ['one', 'two', 'three']
data = [1, 2, 3]
arr = np.array(data)
dictionary = {'one': 1, 'two': 2, 'three': 3}

In [5]:
#pass the data
pd.Series(data)

0    1
1    2
2    3
dtype: int64

In [7]:
#passing the data and label
pd.Series(data, label)

one      1
two      2
three    3
dtype: int64

In [8]:
#passing the numpy Array
pd.Series(arr)

0    1
1    2
2    3
dtype: int32

In [9]:
#passing the dictionary
pd.Series(dictionary)

one      1
two      2
three    3
dtype: int64

In [11]:
#accessing the data in a series
ser = pd.Series(dictionary)
ser

one      1
two      2
three    3
dtype: int64

In [13]:
#accessing the data using the index name
ser['one']

1

### Creating Pandas DataFrame
Signature: pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)

In [26]:
from numpy.random import randint
np.random.seed(101)

In [29]:
df = pd.DataFrame(randint(1, 100, size=(5,5)),['A','B','C','D','E'],['F','G','H','I','J'])
df

Unnamed: 0,F,G,H,I,J
A,35,45,73,20,11
B,77,96,88,1,74
C,9,63,37,84,29
D,64,8,11,53,57
E,39,74,53,19,72


In [30]:
# retrieve column using bracket notation
df['F']

A    35
B    77
C     9
D    64
E    39
Name: F, dtype: int32

In [33]:
# checking type will show that pandas is just a bunch of Series that share the same index
type(df['F'])

pandas.core.series.Series

In [37]:
# retrieve columns using bracket notation
df[['F', 'G']]

Unnamed: 0,F,G
A,35,45
B,77,96
C,9,63
D,64,8
E,39,74


#### Adding a new column

In [49]:
#adding a new column to the dataframe
df['new_col'] = randint(5, 20, 5)

In [50]:
df

Unnamed: 0,F,G,H,I,J,new_col
A,35,45,73,20,11,16
B,77,96,88,1,74,8
C,9,63,37,84,29,6
D,64,8,11,53,57,18
E,39,74,53,19,72,13


#### Dropping a column

In [51]:
df.drop('new_col', axis=1)

Unnamed: 0,F,G,H,I,J
A,35,45,73,20,11
B,77,96,88,1,74
C,9,63,37,84,29
D,64,8,11,53,57
E,39,74,53,19,72


In [52]:
df

Unnamed: 0,F,G,H,I,J,new_col
A,35,45,73,20,11,16
B,77,96,88,1,74,8
C,9,63,37,84,29,6
D,64,8,11,53,57,18
E,39,74,53,19,72,13


In [53]:
# note that drop will not actually take effect unless you specify the inplace=True
df.drop('new_col', axis=1, inplace=True)

In [54]:
df

Unnamed: 0,F,G,H,I,J
A,35,45,73,20,11
B,77,96,88,1,74
C,9,63,37,84,29
D,64,8,11,53,57
E,39,74,53,19,72


#### Dropping a Row 

In [55]:
df.drop('E')

Unnamed: 0,F,G,H,I,J
A,35,45,73,20,11
B,77,96,88,1,74
C,9,63,37,84,29
D,64,8,11,53,57


#### Why does axis = 0 for rows and axis = 1 for colums

In [56]:
# this can be explained by taking the shape of the dataframe
# executing df.shape will give us a tuple where in index 0 is the number of rows
# and index 1 is the number of columns
df.shape

(5, 5)

#### Retrieving row data from the DataFrame
1. Label based index - loc['labelname']
2. Numeric based index - iloc[indexnumber]

In [57]:
df

Unnamed: 0,F,G,H,I,J
A,35,45,73,20,11
B,77,96,88,1,74
C,9,63,37,84,29
D,64,8,11,53,57
E,39,74,53,19,72


In [58]:
#getting row C
df.loc['C']

F     9
G    63
H    37
I    84
J    29
Name: C, dtype: int32

In [59]:
#getting the same row using numeric index
df.iloc[2]

F     9
G    63
H    37
I    84
J    29
Name: C, dtype: int32

#### Getting subset of the DataFrame

In [60]:
df

Unnamed: 0,F,G,H,I,J
A,35,45,73,20,11
B,77,96,88,1,74
C,9,63,37,84,29
D,64,8,11,53,57
E,39,74,53,19,72


In [61]:
df.loc[['A','B'],['I','J']]

Unnamed: 0,I,J
A,20,11
B,1,74
