### Pandas
<p> Pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. </p>

In [2]:
# Initial step is to import pandas
import pandas as pd
import numpy as np

In [3]:
# Creating Dataframe
df = pd.DataFrame(np.arange(0,25).reshape(5,5), index=['Row-01', 'Row-02', 'Row-03', 'Row-04', 'Row-05'], columns=['Column-01', 'Column-02', 'Column-03', 'Column-04', 'Column-05'])
# Index should be given based on the number of Rows & Columns



In [28]:
df["Column-04"]

Row-01     3
Row-02     8
Row-03    13
Row-04    18
Row-05    23
Name: Column-04, dtype: int32

In [29]:
type(df["Column-04"])

pandas.core.series.Series

In [31]:
# To see certain multiple columns
df[["Column-02","Column-05"]]

Unnamed: 0,Column-02,Column-05
Row-01,1,4
Row-02,6,9
Row-03,11,14
Row-04,16,19
Row-05,21,24


In [4]:
df.head()

Unnamed: 0,Column-01,Column-02,Column-03,Column-04,Column-05
Row-01,0,1,2,3,4
Row-02,5,6,7,8,9
Row-03,10,11,12,13,14
Row-04,15,16,17,18,19
Row-05,20,21,22,23,24


In [5]:
# Create CSV (Comma Seoarated Value) file from existing data
df.to_csv("TestFile.csv")

In [6]:
# Accessing the elements. There are two ways to access the elements.
# (i) .loc (focus on row index)  and (ii) .iloc (focus on both row & column index)
df.loc['Row-01']


Column-01    0
Column-02    1
Column-03    2
Column-04    3
Column-05    4
Name: Row-01, dtype: int32

In [7]:
# Check the type
type(df.loc['Row-01'])

pandas.core.series.Series

#### Dataframe & Data Series
<p>Dataframe is the combination of rows and columns. Whereas data series is the combination of rows only.</p>


In [8]:
df.iloc[:,:]  # Shows all the rows and columns

Unnamed: 0,Column-01,Column-02,Column-03,Column-04,Column-05
Row-01,0,1,2,3,4
Row-02,5,6,7,8,9
Row-03,10,11,12,13,14
Row-04,15,16,17,18,19
Row-05,20,21,22,23,24


In [12]:
df.iloc[0:2, 0:3]   # Shows the certain rows and columns

Unnamed: 0,Column-01,Column-02,Column-03
Row-01,0,1,2
Row-02,5,6,7


In [16]:
# Check the type
type(df.iloc[0:2,0:3])

pandas.core.frame.DataFrame

In [13]:
df.iloc[:, 0:2]    # Shows the certain columns only

Unnamed: 0,Column-01,Column-02
Row-01,0,1
Row-02,5,6
Row-03,10,11
Row-04,15,16
Row-05,20,21


In [15]:
df.iloc[0:3,:]    # Shows the certain rows only

Unnamed: 0,Column-01,Column-02,Column-03,Column-04,Column-05
Row-01,0,1,2,3,4
Row-02,5,6,7,8,9
Row-03,10,11,12,13,14


In [17]:
# Take the elements from 3rd column
df.iloc[:, 2:]

Unnamed: 0,Column-03,Column-04,Column-05
Row-01,2,3,4
Row-02,7,8,9
Row-03,12,13,14
Row-04,17,18,19
Row-05,22,23,24


In [18]:
# Convert Dataframe into Array
df.iloc[:, 2:].values

array([[ 2,  3,  4],
       [ 7,  8,  9],
       [12, 13, 14],
       [17, 18, 19],
       [22, 23, 24]])

In [19]:
# check the shape of the Array
df.iloc[:, 2:].values.shape

(5, 3)

In [20]:
# Check null values of a dataframe
df.isnull().sum()

Column-01    0
Column-02    0
Column-03    0
Column-04    0
Column-05    0
dtype: int64

In [21]:
df

Unnamed: 0,Column-01,Column-02,Column-03,Column-04,Column-05
Row-01,0,1,2,3,4
Row-02,5,6,7,8,9
Row-03,10,11,12,13,14
Row-04,15,16,17,18,19
Row-05,20,21,22,23,24


In [25]:
# check unique catagories
df['Column-03'].value_counts()

7     1
22    1
12    1
2     1
17    1
Name: Column-03, dtype: int64

In [26]:
# It also check uniqueness
df['Column-03'].unique()

array([ 2,  7, 12, 17, 22], dtype=int64)