In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

immigration_dataframe = pd.read_excel('immigration_data.xlsx', sheet_name=1, skiprows=20, skipfooter=2)

print(immigration_dataframe)

# For getting the first 5 rows of the dataframe, we use the head method
immigration_dataframe.head(5)

# For getting the last 5 rows of the dataframe, we use the tail method
immigration_dataframe.tail(5)

# For getting the rows from 10 to 19, we use the slicing operator
immigration_dataframe[10:20]

# If we want to look data rows wise instead of column wise, we can use the iloc method
# This is used for accessing indexed locations.
immigration_dataframe.iloc[10]

# To get the names of the columns, we use the columns keyword
immigration_dataframe.columns

# If we want to select a single column, we can pass the name of the column in it
immigration_dataframe['OdName']

# If we want to select multiple columns, we can pass the names of the columns in a list
immigration_dataframe[['OdName', 1980, 1981]]

# For getting all the rows for the first 5 columns, we can use the iloc method
immigration_dataframe.iloc[:, 0:5]

# The set_option method is used to set the maximum number of rows and columns to be displayed
pd.set_option('display.max_columns', 50)

# info method is used to get the information about the dataframe
immigration_dataframe.info()

# describe method is used to get the statistical summary of the dataframe
immigration_dataframe.describe()

# we can ask it to exclude some fields
immigration_dataframe.describe(exclude=['number'])

# the select_dtypes method is used to select
# the columns based on the data types
# we can pass the data types in a list
immigration_dataframe.select_dtypes(include=['number'])
immigration_dataframe.select_dtypes(include=['object'])

# To get the shape of the dataframe, we use the shape attribute
immigration_dataframe.shape

# If we want to add new columns to the dataframe, we can do the following
random_nums = np.random.randint(10, 1000, 195)
immigration_dataframe['random'] = random_nums # This will add a new column to the dataframe
immigration_dataframe

# If we want to sum all the numbers for each of the years, we can do the following
# When we do the sum, it will sum all the numbers for each of the columns
immigration_dataframe['total'] = immigration_dataframe[list(range(1980, 2014))].sum(axis=1)

# This will display the total number of people who migrated from each country
immigration_dataframe[['OdName', 'total']]

# We can also create a graph using this data, but before that we will sort the data.
immigration_dataframe.sort_values(
    by='total',
    ascending=False,
    inplace=True
)
immigration_dataframe.head(50).plot(
    title='Immigration Data',
    x='OdName',
    y='total',
    kind='bar',
    figsize=(20, 10)
)