# Some useful operations 
 Pandas data structures has a lot of basic functionality <br>
Ref: https://pandas.pydata.org/pandas-docs/stable/user_guide/basics.html


In [12]:
import pandas as pd 
import numpy as np

In [3]:
df = pd.DataFrame({
    'col1':[1,2,3,4],
    'col2':[111,222,333,111],
    'col3':['CS&IS','Bio','Chem','Eng']
})
df

Unnamed: 0,col1,col2,col3
0,1,111,CS&IS
1,2,222,Bio
2,3,333,Chem
3,4,111,Eng


### Head and tail method 

In [4]:
#Using head method
#Return the first `n` rows.
df.head(2)

Unnamed: 0,col1,col2,col3
0,1,111,CS&IS
1,2,222,Bio


In [5]:
#Using tail method 
#Return the last `n` rows.
df.tail(2)

Unnamed: 0,col1,col2,col3
2,3,333,Chem
3,4,111,Eng


### unique() method and nunique method 

In [6]:
#Unique values
df['col2'].unique()

array([111, 222, 333])

In [7]:
#No. of unique values
df['col2'].nunique()

3

In [8]:
#value counts 
#Return a Series containing counts of unique rows in the DataFrame.
df['col2'].value_counts()

111    2
222    1
333    1
Name: col2, dtype: int64

In [9]:
#values 
df['col2'].values

array([111, 222, 333, 111])

In [10]:
# Applying multiple conditions
df[(df['col1']>1) & (df['col3']=='Chem')]

Unnamed: 0,col1,col2,col3
2,3,333,Chem


## Applying functions

In [13]:
#using numpy built-in functions
np.square(df['col1'])

0     1
1     4
2     9
3    16
Name: col1, dtype: int64

### Using applying method you will be able to apply your defined method

In [10]:
#define mul() 
def mul(x):
    return x*x

In [11]:
#Applying mul() in column1
df['col1'].apply(mul)

0     1
1     4
2     9
3    16
Name: col1, dtype: int64

In [15]:
### Or using the lambda exapression
df['col1'].apply(lambda x:x**2)

0     1
1     4
2     9
3    16
Name: col1, dtype: int64

In [12]:
#applying len method 
df['col3'].apply(len)

0    5
1    3
2    4
3    3
Name: col3, dtype: int64

In [13]:
#Applying sum() 
df['col1'].sum()

10

 ### Deleting a row/col permanently 
 del df, df.drop(), df.pop()

In [14]:
#delete a column, similar to drop df.drop('col1',inplace=1)
del df['col1']

In [15]:
df

Unnamed: 0,col2,col3
0,111,CS&IS
1,222,Bio
2,333,Chem
3,111,Eng


In [16]:
#drop rows by index 
df.drop([0,1])

Unnamed: 0,col2,col3
2,333,Chem
3,111,Eng


In [17]:
#drop and return 
df.pop('col2')

0    111
1    222
2    333
3    111
Name: col2, dtype: int64

In [18]:
df

Unnamed: 0,col3
0,CS&IS
1,Bio
2,Chem
3,Eng


## Column and index 

In [19]:
df.columns

Index(['col3'], dtype='object')

In [20]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [21]:
df

Unnamed: 0,col3
0,CS&IS
1,Bio
2,Chem
3,Eng


In [22]:
#sort values 

df.sort_values(by='col3')

Unnamed: 0,col3
1,Bio
0,CS&IS
2,Chem
3,Eng


## Handling null values 

In [23]:
#Finding null values 
df.isnull()

Unnamed: 0,col3
0,False
1,False
2,False
3,False


In [24]:
df.dropna()

Unnamed: 0,col3
0,CS&IS
1,Bio
2,Chem
3,Eng


In [25]:
import numpy as np

df = pd.DataFrame({
    'col1':[1,2,np.nan],
    'col2':[10,20,np.nan],
    'col3':['CS&IS',np.nan,np.nan]
})
df

Unnamed: 0,col1,col2,col3
0,1.0,10.0,CS&IS
1,2.0,20.0,
2,,,


In [26]:
#fill nan by values 
df.fillna('Fill_value')

Unnamed: 0,col1,col2,col3
0,1,10,CS&IS
1,2,20,Fill_value
2,Fill_value,Fill_value,Fill_value
