#### DataFrame and Series Basics

In [None]:
'''
Two-dimensional, size-mutable, potentially heterogeneous tabular data.

A Data frame is a two-dimensional data structure, i.e., data is aligned in a tabular fashion in rows and columns. 

Pandas DataFrame consists of three principal components, the data, rows, and columns.
'''

In [None]:
'''
A Pandas Series is like a column in a table.

It is a one-dimensional array holding data of any type (integer, string, float, python objects, etc.).
'''

In [None]:
person = {
    'first':'Vaibhav',
    'last' : 'Nath',
    'email' : 'Vaibhav.nath@email.com'
}

In [None]:
person = {
    'first':['Vaibhav'],
    'last' : ['Nath'],
    'email' : ['Vaibhav.nath@email.com']
}

In [1]:
person = {
    'first':['Vaibhav','Sam','John'],
    'last' : ['Nath','Ham','Doe'],
    'email' : ['Vaibhav.nath@email.com', 'Sam.Ham@email.com','John.Doe@email.com']
}

In [2]:
person['first']

['Vaibhav', 'Sam', 'John']

In [3]:
person['email']

['Vaibhav.nath@email.com', 'Sam.Ham@email.com', 'John.Doe@email.com']

#### Create a Dataframe

In [4]:
import pandas as pd

In [5]:
df = pd.DataFrame(person)
df

Unnamed: 0,first,last,email
0,Vaibhav,Nath,Vaibhav.nath@email.com
1,Sam,Ham,Sam.Ham@email.com
2,John,Doe,John.Doe@email.com


In [6]:
df['email']     #access data from dataframe by calling key.

0    Vaibhav.nath@email.com
1         Sam.Ham@email.com
2        John.Doe@email.com
Name: email, dtype: object

In [7]:
df.email       #access data from dataframe by calling key as attribute.

0    Vaibhav.nath@email.com
1         Sam.Ham@email.com
2        John.Doe@email.com
Name: email, dtype: object

In [8]:
df[['first','last']]     #access different columns of DataFrame.

Unnamed: 0,first,last
0,Vaibhav,Nath
1,Sam,Ham
2,John,Doe


In [20]:
df=pd.DataFrame(person , index =['1','2','3'])     #change the index values of DataFrame
df

Unnamed: 0,first,last,email
1,Vaibhav,Nath,Vaibhav.nath@email.com
2,Sam,Ham,Sam.Ham@email.com
3,John,Doe,John.Doe@email.com


In [9]:
type(df['email'])

pandas.core.series.Series

In [10]:
df.columns   #gives names of all columns of DataFrame.

Index(['first', 'last', 'email'], dtype='object')

In [None]:
'''
loc() : loc() is label based data selecting method which means that we have to pass the name of 
the row or column which we want to select.

iloc() : iloc() is a indexed based selecting method which means that we have to pass integer index
in the method to select specific row/column.

'''

In [11]:
df.iloc[0]       #access data via index value

first                   Vaibhav
last                       Nath
email    Vaibhav.nath@email.com
Name: 0, dtype: object

In [12]:
df.iloc[1]       #access data via index value

first                  Sam
last                   Ham
email    Sam.Ham@email.com
Name: 1, dtype: object

In [13]:
df.iloc[2]       #access data via index value

first                  John
last                    Doe
email    John.Doe@email.com
Name: 2, dtype: object

In [14]:
df.iloc[[0, 1]]     #access multiple data via index values by passing index as list

Unnamed: 0,first,last,email
0,Vaibhav,Nath,Vaibhav.nath@email.com
1,Sam,Ham,Sam.Ham@email.com


In [15]:
df.iloc[[0,1,2] , 2]     #access column 2 values for row 1,2,3

0    Vaibhav.nath@email.com
1         Sam.Ham@email.com
2        John.Doe@email.com
Name: email, dtype: object

In [16]:
df

Unnamed: 0,first,last,email
0,Vaibhav,Nath,Vaibhav.nath@email.com
1,Sam,Ham,Sam.Ham@email.com
2,John,Doe,John.Doe@email.com


In [17]:
df.loc[0]            #access details via row name.

first                   Vaibhav
last                       Nath
email    Vaibhav.nath@email.com
Name: 0, dtype: object

In [18]:
df.loc[[1,2]]        #access details via row name.  

Unnamed: 0,first,last,email
1,Sam,Ham,Sam.Ham@email.com
2,John,Doe,John.Doe@email.com


In [19]:
df.loc[[0,1], ['email','last'] ]    #access email and last name values for row 1,2

Unnamed: 0,email,last
0,Vaibhav.nath@email.com,Nath
1,Sam.Ham@email.com,Ham
