## Pandas Operations:

In [2]:
import numpy as np
import pandas as pd

In [3]:

df = pd.DataFrame(
    {
        'col1' : [1,2,3,4],
        'col2' : [444,555,666,444],
        'col3' : ['abc','def','ghi','xyz']
    })
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [4]:
df.head(2) # first n values

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def


In [5]:
df.nunique() # length of unique values

col1    4
col2    3
col3    4
dtype: int64

In [6]:
df['col1'].nunique()

4

In [7]:
df['col2'].value_counts() 

col2
444    2
555    1
666    1
Name: count, dtype: int64

Condition based Selection:

In [8]:
df[df['col1'] > 2]

Unnamed: 0,col1,col2,col3
2,3,666,ghi
3,4,444,xyz


Method based Selection:

`.apply` [kinda similiar to mapping..]

In [9]:
times2 = lambda n: n*2

df['col1'].apply(times2)

0    2
1    4
2    6
3    8
Name: col1, dtype: int64

In [10]:
df['col2'].apply(str).apply(len)

0    3
1    3
2    3
3    3
Name: col2, dtype: int64

In [11]:
# To get the list of all the columns
df.columns

Index(['col1', 'col2', 'col3'], dtype='object')

Ordering / Sorting the values in df:

In [12]:
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [13]:
df.sort_values('col2') #sorting entire df according to the col2

Unnamed: 0,col1,col2,col3
0,1,444,abc
3,4,444,xyz
1,2,555,def
2,3,666,ghi


In [14]:
df.isnull() # Check for the null values

Unnamed: 0,col1,col2,col3
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False


### Pandas Read/Write data in various sources...

- CSV, Excel, HTML, SQL can be used as data source / destination...


In [23]:
# Reading data from .csv/.xlsx file:

df = pd.read_excel('Excel_sample.xlsx', 
              sheet_name='Sheet1', 
              index_col=0) # 0 is given to avoid copying index into new col...
df

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [27]:
# Reading file from html...

df_htm = pd.read_html(
    'https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list/'
)
df_htm[0].head(5)


# Writing this data to csv file...

df_htm[0].to_csv('output_data.csv')