In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import random

immigration_data = pd.read_excel('immigration_data.xlsx', sheet_name=1, skiprows=20, skipfooter=2)

# We can sort the values in a pandas dataframe by using the sort_values() method.
# Assigning ascending to False will sort the values in descending order.
# We can set the column to sort by using the by parameter.
# It can also receive a list of columns to sort by.
# The preference will be determined by the order of the columns in the list.
print(immigration_data.sort_values(by=['AreaName', 'OdName'], ascending=[False, True]).head(5))

# If we want to remove columns from the dataframe, we can use the drop() method.
# The drop() method receives a list of columns to drop.
# For sample, we will first add and then remove the column.
immigration_data['temp'] = random.randint(0, 100)
print(immigration_data.head(5))
immigration_data.drop(columns=['temp'], inplace=True)
print(immigration_data.head(5))

# Instead of using columns, we can also use index to drop rows.
print(immigration_data.head(5))
print(immigration_data.drop(index=[0, 1, 2]).head(5))

# Similar to drop, drop_duplicates will remove duplicate rows from the dataframe.
print(immigration_data.drop_duplicates().head(5))

# If we want to remove duplicate rows based on a subset of columns, we can use the subset parameter.
# The subset parameter receives a list of columns to check for duplicates.
# The following will remove duplicate rows based on the AreaName column.
# If it finds two rows with the same AreaName, it will remove the second row.
print(immigration_data.drop_duplicates(subset=['AreaName']).head(10))

# The reset_index method is used to reset the index of the dataframe.
# This will remove the current index and replace it with a new one.
print(immigration_data.reset_index().head(10))

# The nunique method is used to count the number of unique values in a column.
print(immigration_data['Coverage'].nunique())

# If we want these values, we can use the unique method to get a list of unique values.
print(immigration_data['Coverage'].unique())

# The value_counts method is used to count the number of times each value appears in a column.
# It returns a series with the values as the index and the counts as the values.
# If we want to plot a graph of the same, we can store
# the return type in a variable and use the plot method on it.
region_wise_count = immigration_data['RegName'].value_counts()
print(region_wise_count)
region_wise_count.plot(kind='barh')
plt.show()

# The rename method is used to rename the columns of a dataframe.
immigration_data.rename(
    columns={
        'OdName': 'Country',
        'AreaName': 'Continent',
        'RegName': 'Region',
        'DevName': 'Status',
    },
    inplace=True,
)

# We will drop the Type, Coverage, AREA, REG and DEV columns as they are not required.
immigration_data.drop(
    columns=['Type', 'Coverage', 'AREA', 'REG', 'DEV'],
    inplace=True,
)
# The columns listed above have been removed now.
# We can check the same using the columns attribute.
print(immigration_data.columns)