# python pandas library - Part 1

In part we are going to learn about

1. Pandas Dataframe
2. Pandas Series
3. Pandas Basic Operations

In [3]:
import pandas as pd
import numpy as np

In [7]:
arr = np.arange(0, 20).reshape(5, 4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [12]:
# Create Dataframe
df = pd.DataFrame(data=arr, index=["Row1", "Row2", "Row3", "Row4", "Row5"], columns=["Col1", "Col2", "Col3", "Col4"])

In [13]:
# List/display top 5 records
# default size is 5 records
df.head()

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [14]:
# List/display bottom 5 records
# default size is 5 records
df.tail()

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [15]:
# List/display top 10 records
df.head(10) # changing the default size to 10

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [16]:
type(df)

pandas.core.frame.DataFrame

In [17]:
# list/display the info of current dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Row1 to Row5
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Col1    5 non-null      int32
 1   Col2    5 non-null      int32
 2   Col3    5 non-null      int32
 3   Col4    5 non-null      int32
dtypes: int32(4)
memory usage: 120.0+ bytes


In [19]:
# If colums is of int/float type then only 
# it will select
df.describe()

Unnamed: 0,Col1,Col2,Col3,Col4
count,5.0,5.0,5.0,5.0
mean,8.0,9.0,10.0,11.0
std,6.324555,6.324555,6.324555,6.324555
min,0.0,1.0,2.0,3.0
25%,4.0,5.0,6.0,7.0
50%,8.0,9.0,10.0,11.0
75%,12.0,13.0,14.0,15.0
max,16.0,17.0,18.0,19.0


**Indexing Concept**

1. using column_name 
2. using rowindex[loc] (here loc is a property.)
3. using rown_index and column_index number[.iloc] (iloc is index location)

In [21]:
# display/list the dataframe
df.head()

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [23]:
# Using column name
# display the col1 data
df['Col1']

Row1     0
Row2     4
Row3     8
Row4    12
Row5    16
Name: Col1, dtype: int32

In [24]:
type(df['Col1'])

pandas.core.series.Series

In [34]:
# Using column name
# display col1, col4, and col3 data
# Note :- For multiple columns we have to use [[]] 2d list
df[['Col1', 'Col4', 'Col3']]

Unnamed: 0,Col1,Col4,Col3
Row1,0,3,2
Row2,4,7,6
Row3,8,11,10
Row4,12,15,14
Row5,16,19,18


In [28]:
type(df[['Col1', 'Col2', 'Col3']])

pandas.core.frame.DataFrame

In [29]:
# using rowindex[loc]
# We will give the row name
df.loc['Row3']

Col1     8
Col2     9
Col3    10
Col4    11
Name: Row3, dtype: int32

In [30]:
type(df.loc['Row3'])

pandas.core.series.Series

In [31]:
# using rowindex[loc]
# by using row-index name
df.loc[['Row3', 'Row1', 'Row4']]

Unnamed: 0,Col1,Col2,Col3,Col4
Row3,8,9,10,11
Row1,0,1,2,3
Row4,12,13,14,15


In [36]:
# using rown_index and column_index number[.iloc]
# display 3rd, and 4th rows . col1, and col2 data
# iloc[rows:cols]
df.iloc[2:4, :2]

Unnamed: 0,Col1,Col2
Row3,8,9
Row4,12,13


In [37]:
df.iloc[2:, 1:]

Unnamed: 0,Col2,Col3,Col4
Row3,9,10,11
Row4,13,14,15
Row5,17,18,19


In [39]:
# select 1st and last columns all records
df.iloc[:, ::3]

Unnamed: 0,Col1,Col4
Row1,0,3
Row2,4,7
Row3,8,11
Row4,12,15
Row5,16,19


**Convert dataframe into arrays**

In [41]:
# Convert dataframe into arrays
df.iloc[:, 1:].values

array([[ 1,  2,  3],
       [ 5,  6,  7],
       [ 9, 10, 11],
       [13, 14, 15],
       [17, 18, 19]])

**Basic Operations**

In [46]:
# Create Dataframe
arr = [[1, np.nan, 2], [1, 2, 3]]
df = pd.DataFrame(data=arr, index=["Row1", "Row2"], columns=["Col1", "Col2", "Col3"])

In [47]:
df

Unnamed: 0,Col1,Col2,Col3
Row1,1,,2
Row2,1,2.0,3


In [49]:
# display the columns and rows with True and False
# True if col is having null value else False
df.isnull()

Unnamed: 0,Col1,Col2,Col3
Row1,False,True,False
Row2,False,False,False


In [50]:
# display the total null values columns wise
df.isnull().sum()

Col1    0
Col2    1
Col3    0
dtype: int64

In [51]:
df.isnull().sum() == 0

Col1     True
Col2    False
Col3     True
dtype: bool

In [52]:
df['Col3'].value_counts()

2    1
3    1
Name: Col3, dtype: int64

In [56]:
df['Col1'].unique()

array([1], dtype=int64)

In [57]:
df['Col3'].unique()

array([2, 3], dtype=int64)

In [63]:
# Conditional Indexing
df[df['Col3'] > 2]

Unnamed: 0,Col1,Col2,Col3
Row2,1,2.0,3
