## Step 1. Importing and reading data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

stats = pd.read_csv ("can-stats-2012-22.csv")


## Step 2: Data Preparation

In [None]:
stats.head ()

: 

In [None]:
#exploring the data
stats.info ()

'''the data types are already listed by the .info()
 therefore there is no need to run dtypes'''
#print (stats.dtypes)

stats.describe()

: 

In [None]:
#missing values
stats.isnull().sum()

#stats.isnull().value_counts()

: 

## Step 3: Exploratory Data Analysis & Visualization

**Question 1: From which countries has Canada admitted the highest number of refugees?**

In [None]:
high_cntry_refugee = stats.groupby('Country-of-origin')['UNHCR-refugees'].sum().reset_index(name='UNHCR-refugees').sort_values(by='UNHCR-refugees', ascending=False).head(10)

print (high_cntry_refugee)

: 

In [None]:
#displaying the results as a bar
high_cntry_refugee.reset_index(drop=True).style.bar()

: 

In [None]:
#horizontal bar graph with matplot

high_cntry_refugee.plot(kind= 'barh', x='Country-of-origin', title= 'Countries With The Highest Number of Refugees', xlabel='Refugees', ylabel='Country of origin')

#stats.groupby('Country-of-origin')['UNHCR-refugees'].sum().sort_values(ascending=False).head(10).plot(kind= 'barh', title= 'Countries With The Highest Number of Refugees', xlabel='Refugees', ylabel='Country of origin')
plt.show()

: 

In [None]:
#seaborn plot
sns.barplot(data=high_cntry_refugee, x='UNHCR-refugees', y='Country-of-origin')
plt.title('Countries With Highest Number of Refugees')
plt.show()

: 

**Question 2: What are the total number of resettled refugees in Canada per year?**

In [None]:
resettled_refugee_year= stats.groupby('Year')['UNHCR-refugees'].sum().reset_index(name='UNHCR-refugees')
print (resettled_refugee_year)

: 

In [None]:
#seaborn line graph
sns.lineplot(data=resettled_refugee_year, x='Year', y= 'UNHCR-refugees', marker='o')
plt.title("Total Resettled Refugees Per Year")
plt.ylabel('Total Refugees')
plt.show()

: 

**Question 3: What are the countries of origin for the majority of asylum claims made in Canada?**

In [None]:
asylum_country = stats.groupby('Country-of-origin')['Asylum-seekers'].sum().reset_index(name='Asylum-seekers').sort_values(by='Asylum-seekers',ascending= False).head (10)
asylum_country.reset_index(drop=True).style.bar(color= "green")

: 

In [None]:
#displaying results excluding the unkonwn countries
#asylum_country.query(['Country-of-origin=="Unknown"'])


: 

In [None]:
#seaborn plot
sns.barplot(data=asylum_country, y='Country-of-origin', x='Asylum-seekers')

plt.title('Countries With Highest Number of Asylum Seekers')
plt.show()

: 

**Question 4: What is the total number of asylum claims made in Canada every year?**

In [None]:
asy_claim_year = stats.groupby('Year')['Asylum-seekers'].sum().reset_index(name='Asylum-seekers')
print (asy_claim_year)

: 

In [None]:
#seaborn line graph
sns.lineplot(data=asy_claim_year, x= 'Year', y='Asylum-seekers', marker='o')

plt.title('Total Asylum Seekers Per Year')
plt.ylabel('Total asylum Seekers')
plt.show()


: 

**Question 5: What are the general trends in refugee and asylum statistics from 2012-2022?**

In [None]:
#creating a data frame that contains the year, refugee and asylum seekers total count per year
trend_df = resettled_refugee_year
trend_df ['Asylum-seekers'] = asy_claim_year['Asylum-seekers']

plt.figure(figsize=(8,6))
sns.lineplot (data=trend_df, x='Year', y='UNHCR-refugees', marker='s', color = 'salmon')
sns.lineplot (data=trend_df, x='Year', y= 'Asylum-seekers', marker = 'd', color = 'teal')

plt.title ('Trends In Refugee and Asylum Statistics (2012-2022)')
plt.ylabel('Total People')

#add a legend and grid
plt.grid()

plt.show()

: 

In [None]:
trend_df.head (10)

: 

In [None]:
#to create a seaborn barplot, first melt the df
melted_df = pd.melt(trend_df, id_vars=['Year'], value_vars=['UNHCR-refugees', 'Asylum-seekers'], var_name='Persons', value_name= 'Counts')

plt.figure(figsize=(10,6))
sns.barplot (data=melted_df, x='Year', y='Counts', hue='Persons')

# Adding labels and title
plt.title('Total Number of Refugees and Asylum Seekers from 2012-2022')
plt.xlabel('Year')
plt.ylabel('Count')

plt.show()

: 

## 2012- 2022 Canada Refugee Statistics EDA Results Summary

**1. Top 10 countries from which Canada has admitted the highest number of refugees.**
* Colombia (108416)
* China (98586)
* Ukraine (88376)
* Pakistan (74737)
* Haiti (70956)
* Sri Lanka (66343)
* Nigeria (60554)
* Mexico (51072)
* Türkiye (42533)
* Iran (Islamic Rep. of) (40881)

**2. Total number of refugees resettled in Canada per year**
* 2012: 163751   
* 2013: 160347
* 2014: 149164
* 2015: 135890
* 2016: 97322
* 2017: 104768
* 2018: 114101
* 2019: 101757
* 2020: 109214
* 2021: 130125
* 2022: 140621

**3. Top 10 countries of origin for the majority of asylum claims in Canada**
* Unkonwn: 56098
* Nigeria: 51620
* India: 48806
* Mexico: 47146
* Haiti: 40908
* Colombia: 30475
* China: 21526
* Pakistan: 19023
* Türkiye: 18166
* Iran (Islamic Rep. of): 15011

**4. Total number of asylum seekers in Canada per year**
* 2012: 32647
* 2013: 22145
* 2014: 16699
* 2015: 19631
* 2016: 23946
* 2017: 51859
* 2018: 78766
* 2019: 97017
* 2020: 85352
* 2021: 63143
* 2022: 113066

**5. General trends in refugee and asylum statistics from 2012-2022**
* There was a continuous decline in the number of refugees being resettled in Canada between the years 2012 and 2016.
* A slight increase was experienced between 2016 and 2018.Despite the slight drop in the numbers in 2018 to 2019, there has been a steady increase in the number of refugees between 2019 and 2022.

* There was a slight decline between 2012 and 2014 in the number of asylum seekers. A slight increase occured between 2014 and 2016, followed by steady incline between 2016 and 2019.
* A significant decline was experienced thereafter. However, there was a steep increase in the number of asylum seekers between 2021 and 2022.