# Migration between Germany and foreign countries 1991 to 2021

https://www.destatis.de/EN/Themes/Society-Environment/Population/Migration/Tables/migration-total.html

# Import Libraries

In [1]:
import requests
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

import warnings
import requests
from bs4 import BeautifulSoup

# Options and Settings

In [2]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.autolayout'] = True
plt.rcParams['figure.figsize'] = [12, 9]
plt.rcParams['font.size'] = 10
path = os.getcwd()                                         # get current working directory
warnings.simplefilter('ignore')

# Fetch Data

In [3]:
url = 'https://www.destatis.de/EN/Themes/Society-Environment/Population/Migration/Tables/migration-total.html'

r = requests.get(url)

print(f"Status Code: {r.status_code} -- Content-Type: {r.headers['Content-Type']} -- Encoding: {r.encoding}")

Status Code: 200 -- Content-Type: text/html;charset=utf-8 -- Encoding: utf-8


# Parse HTML Document

In [4]:
soup = BeautifulSoup(r.text, 'html.parser')

# Navigate The Data Structure

In [5]:
tbody = soup.find('tbody')
trs = tbody.find_all('tr')

year = []
totarr = []
totdepart = []
totbal = []
natarr = []
natdepart = []
natbal = []
forarr = []
fordepart = []
forbal = []


for td in trs:
  year.append(td.findAll('td')[0].string)
  totarr.append(td.findAll('td')[1].string)
  totdepart.append(td.find_all('td')[2].string)
  totbal.append(td.find_all('td')[3].string)
  natarr.append(td.find_all('td')[4].string)
  natdepart.append(td.find_all('td')[5].string)
  natbal.append(td.find_all('td')[6].string)
  forarr.append(td.find_all('td')[7].string)
  fordepart.append(td.find_all('td')[8].string)
  forbal.append(td.find_all('td')[9].string)

# Create DataFrame

In [6]:
data = {
    'Year': year,
    'TotalArrivals': totarr,
    'TotalDepartures': totdepart,
    'TotalBalance': totbal,
    'NationalsArrivals': natarr,
    'NationalsDepartures': natdepart,
    'NationalsBalance': natbal,
    'ForeignersArrivals': natarr,
    'ForeignersDepartures': fordepart,
    'ForeignersBalance': forbal  
}

df = pd.DataFrame(data=data)

# Head and Tail

In [7]:
df.head()

Unnamed: 0,Year,TotalArrivals,TotalDepartures,TotalBalance,NationalsArrivals,NationalsDepartures,NationalsBalance,ForeignersArrivals,ForeignersDepartures,ForeignersBalance
0,2021,1 323 466,994 303,329 163,183 650,247 829,-64 179,183 650,746 474,393 342
1,2020,1 186 702,966 451,220 251,191 883,220 239,-28 356,191 883,746 212,248 607
2,2019,1 558 612,1 231 552,327 060,212 669,270 294,-57 625,212 669,961 258,384 685
3,2018,1 585 112,1 185 432,399 680,201 531,261 851,-60 320,201 531,923 581,460 000
4,2017,1 550 721,1 134 641,416 080,166 703,249 181,-82 478,166 703,885 460,498 558


In [8]:
df.tail()

Unnamed: 0,Year,TotalArrivals,TotalDepartures,TotalBalance,NationalsArrivals,NationalsDepartures,NationalsBalance,ForeignersArrivals,ForeignersDepartures,ForeignersBalance
26,1995,1 096 048,698 113,397 935,303 347,130 672,172 675,303 347,567 441,225 260
27,1994,1 082 553,767 555,314 998,305 037,138 280,166 757,305 037,629 275,148 241
28,1993,1 277 408,815 312,462 096,287 561,104 653,182 908,287 561,710 659,279 188
29,1992,1 502 198,720 127,782 071,290 850,105 171,185 679,290 850,614 956,596 392
30,1991,1 198 978,596 455,602 523,273 633,98 915,174 718,273 633,497 540,427 80


In [9]:
df.loc[df['Year'].isnull()]

Unnamed: 0,Year,TotalArrivals,TotalDepartures,TotalBalance,NationalsArrivals,NationalsDepartures,NationalsBalance,ForeignersArrivals,ForeignersDepartures,ForeignersBalance
5,,1 865 122,1 365 178,499944,146 047,281 411,-135 364,146 047,1 083 767,635 308
11,,798 282,670 605,127 677,114752,141 000,-26 248,114752,529 605,153 925
12,,721 014,733 796,-12 782,114700,154 988,-40 288,114700,578 808,27 506
13,,682 146,737 889,-55743,108 331,174 759,-66 428,108 331,563 130,10 685


In [10]:
# set index location 5, 11, 12 & 13 for year column to 2016, 2010, 2009, 2008
df['Year'].iat[5], df['Year'].iat[11], df['Year'].iat[12], df['Year'].iat[13] = 2016, 2010, 2009, 2008

In [11]:
df['Year'].isnull().sum()                         

0

In [12]:
df.shape

(31, 10)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Year                  31 non-null     object
 1   TotalArrivals         31 non-null     object
 2   TotalDepartures       31 non-null     object
 3   TotalBalance          31 non-null     object
 4   NationalsArrivals     31 non-null     object
 5   NationalsDepartures   31 non-null     object
 6   NationalsBalance      31 non-null     object
 7   ForeignersArrivals    31 non-null     object
 8   ForeignersDepartures  31 non-null     object
 9   ForeignersBalance     31 non-null     object
dtypes: object(10)
memory usage: 2.5+ KB


In [14]:
df.dtypes.value_counts()

object    10
dtype: int64