### D. More Operations and Functions

This section will show the more and most useful functions of Pandas.

In [1]:
import pandas as pd
import numpy as np

In [2]:
df4 = pd.DataFrame({'Product Name':['Shirt','Boot','Bag'], 
              'Order Number':[45,56,64], 
              'Total Quantity':[10,5,9]}, 
              columns = ['Product Name', 'Order Number', 'Total Quantity'])

#### Retrieving basic info about the Dataframe

In [3]:
# Return a summary about the dataframe

df4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Product Name    3 non-null      object
 1   Order Number    3 non-null      int64 
 2   Total Quantity  3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 200.0+ bytes


In [4]:
# Return dataframe columns

df4.columns

Index(['Product Name', 'Order Number', 'Total Quantity'], dtype='object')

In [6]:
df4

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10
1,Boot,56,5
2,Bag,64,9


In [5]:
# Return dataframe data

df4.keys

<bound method NDFrame.keys of   Product Name  Order Number  Total Quantity
0        Shirt            45              10
1         Boot            56               5
2          Bag            64               9>

In [7]:
# Return the head of the dataframe ....could make sense if you have long frame
# Choose how many rows you want in head()

df4.head(1)

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10


In [8]:
# Return the tail of the dataframe

df4.tail(1)

Unnamed: 0,Product Name,Order Number,Total Quantity
2,Bag,64,9


In [10]:
# Return NumPy array of the dataframe

df4.values

array([['Shirt', 45, 10],
       ['Boot', 56, 5],
       ['Bag', 64, 9]], dtype=object)

In [11]:
# Return the size or number of elements in a dataframe

df4.size

9

In [12]:
# Return the shape

df4.shape

(3, 3)

In [13]:
# Return the length of the dataframe/the number of rows in a dataframe

df4.shape[0]

3

In [14]:
# Return the length of the dataframe/the number of columns in a dataframe

df4.shape[1]

3

#### Unique Values

In [15]:
df4

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10
1,Boot,56,5
2,Bag,64,9


In [16]:
# Return unique values in a given column 

df4['Product Name'].unique()

array(['Shirt', 'Boot', 'Bag'], dtype=object)

In [17]:
# Return a number of unique values
df4['Product Name'].nunique()

3

In [18]:
# Counting the occurence of each value in a column 
df4['Product Name'].value_counts()

Shirt    1
Boot     1
Bag      1
Name: Product Name, dtype: int64

#### Applying a Function to Dataframe

In [19]:
# Double the quantity product
def double_quantity(x):
    return x * x

In [20]:
df4

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10
1,Boot,56,5
2,Bag,64,9


In [21]:
df4['Total Quantity'].apply(double_quantity)

0    100
1     25
2     81
Name: Total Quantity, dtype: int64

In [23]:
# You can also apply an anonymous function to a dataframe
# Squaring each value in dataframe
df5 = pd.DataFrame([[1,2], [4,5]], columns=['col1', 'col2'])
df5

Unnamed: 0,col1,col2
0,1,2
1,4,5


In [24]:
df5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   col1    2 non-null      int64
 1   col2    2 non-null      int64
dtypes: int64(2)
memory usage: 160.0 bytes


In [25]:
df5.applymap(lambda x: x**2)

Unnamed: 0,col1,col2
0,1,4
1,16,25


#### Sorting values in dataframe

In [27]:
df4

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10
1,Boot,56,5
2,Bag,64,9


In [28]:
# Sort the df4 by the order number

df4.sort_values(['Order Number'])

Unnamed: 0,Product Name,Order Number,Total Quantity
0,Shirt,45,10
1,Boot,56,5
2,Bag,64,9


In [29]:
df4.sort_values(['Order Number'], ascending=False)

Unnamed: 0,Product Name,Order Number,Total Quantity
2,Bag,64,9
1,Boot,56,5
0,Shirt,45,10
