### As a dual citizen, I am curious which passport I should best use entering what country.

In [52]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np

In [9]:
german = "https://en.wikipedia.org/wiki/Visa_requirements_for_German_citizens"
polish = "https://en.wikipedia.org/wiki/Visa_requirements_for_Polish_citizens"

de_html = requests.get(german).text
pl_html = requests.get(polish).text

In [35]:
de_soup = BeautifulSoup(de_html, 'lxml')
de = pd.read_html(str(de_soup.find("table", {'class' : 'sortable wikitable'})), skiprows=1)[0]

pl_soup = BeautifulSoup(pl_html, 'lxml')
pl = pd.read_html(str(pl_soup.find("table", {'class' : 'sortable wikitable'})), skiprows=1)[0]

In [36]:
de.head()

Unnamed: 0,0,1,2,3
0,Afghanistan,Visa required[32],,
1,Albania,Visa not required[33],90 days,ID card valid
2,Algeria,Visa required[34],,
3,Andorra,Visa not required[35],,ID card valid
4,Angola,eVisa[36][37],30 days,Visitors who have been granted an online pre-v...


In [37]:
pl.head()

Unnamed: 0,0,1,2,3
0,Afghanistan,Visa required[2],,
1,Albania,Visa not required[3],90 days,ID card valid
2,Algeria,Visa required[4],,
3,Andorra,Visa not required[5],,ID card valid
4,Angola,eVisa[6][7],30 days,Visitors who have been granted an online pre-v...


In [38]:
# I am going to merge them into one dataframe to avoid running every function twice on each data frame
# and then separate them when I'm done

In [39]:
de.columns = ['country', 'status', 'duration', 'notes']
pl.columns = ['country', 'status', 'duration', 'notes']

de['cat'] = 'DE'
pl['cat'] = 'PL'

df = de.append(pl)

In [40]:
de.status.str.contains("\[").value_counts()

True    193
Name: status, dtype: int64

In [41]:
df.status = df.status.str.findall("^(.+?)\[").apply(lambda x: x[0] if len(x) > 0 else np.nan)

In [42]:
df.status.value_counts()

Visa not required                  230
Visa required                       56
eVisa / Visa on arrival             40
Visa on arrival                     28
eVisa                               20
eVisitor                             2
Tourist Card required                2
Electronic Travel Authority          2
Visa Waiver Program                  2
Electronic Travel Authorization      1
Free visa on arrival                 1
E-tourist card                       1
eVisa / Free visa on arrival         1
Name: status, dtype: int64

In [43]:
d = {'eVisa / Visa on arrival' : 'eVisa',
    'Visa Waiver Program' : 'Visa not required',
    'Tourist Card required' : 'Visa required',
    'Tourist Card on arrival' : 'Visa on arrival',
    'eVisitor' : 'eVisa',
    'Free visa on arrival' : 'Visa on arrival'}

In [44]:
df.status.replace(d, inplace=True)

In [45]:
df.head()

Unnamed: 0,country,status,duration,notes,cat
0,Afghanistan,Visa required,,,DE
1,Albania,Visa not required,90 days,ID card valid,DE
2,Algeria,Visa required,,,DE
3,Andorra,Visa not required,,ID card valid,DE
4,Angola,eVisa,30 days,Visitors who have been granted an online pre-v...,DE


In [54]:
df.notes.fillna('missing', inplace=True)

In [89]:
df[df.notes.str.contains('[0-9]')].head()

Unnamed: 0,country,status,duration,notes,cat
4,Angola,eVisa,30,Visitors who have been granted an online pre-v...,DE
8,Australia,eVisa,90,90 days on each visit in 12-month period if gr...,DE
15,Belarus,Visa not required,30,Must arrive and depart via Minsk International...,DE
21,Bosnia and Herzegovina,Visa not required,90,90 days within any 6-month period ID card valid,DE
23,Brazil,Visa not required,90,90 days within any 180 day period,DE


In [62]:
mask = df.duration.isnull()
mask2 = df.notes.str.contains('[0-9]')

df[mask * mask2]

Unnamed: 0,country,status,duration,notes,cat
35,China,Visa required,,72-hour visit without a visa when in transit a...,DE
80,Israel,Visa not required,,3 months for tourism only.[131] German citizen...,DE
84,Jordan,Visa on arrival,,Conditions apply[136],DE
130,Pakistan,Electronic Travel Authorization,,Electronic Travel Authorization to obtain a vi...,DE
141,Russia,Visa required,,Free-of-charge eVisa is required for visits of...,DE
158,Somalia,Visa on arrival,,"Available at Berbera, Borama, Burao, Erigavo a...",DE
179,Uganda,eVisa,,May apply online.[248],DE
35,China,Visa required,,72-hour visit without a visa when in transit a...,PL
85,Jordan,Visa on arrival,,Conditions apply.[109] Not available at all en...,PL
131,Pakistan,eVisa,,Online Visa eligible.[163] Electronic Travel A...,PL


In [68]:
df.iloc[80]['duration'] = '90'

In [69]:
df.duration.fillna("0", inplace=True)

In [70]:
df.loc[df.duration.str.contains("month"), 'duration']

5          6 months
11         8 months
14         6 months
17          1 month
26          1 month
30         6 months
41         3 months
52         3 months
58         4 months
66         3 months
70         3 months
72         3 months
86         3 months
90         3 months
94          1 month
103        3 months
119        3 months
123        3 months
143        3 months
152        3 months
169        3 months
175        4 months
176        3 months
192        3 months
5      6 months[11]
11         3 months
14         3 months
17          1 month
26          1 month
30         6 months
41         3 months
52         3 months
58         4 months
67         3 months
73         3 months
81         3 months
87         3 months
91         3 months
95          1 month
104        3 months
120        3 months
143        3 months
152        3 months
169        3 months
175        3 months
176        3 months
192        3 months
Name: duration, dtype: object

In [71]:
# Converting months to days

df.loc[df.duration.str.contains("month"), 'duration'] = \
df.loc[df.duration.str.contains("month"), 'duration'].str.findall("([0-9])").apply(lambda x : str(int(x[0]) * 30))

In [72]:
df.loc[df.duration.str.contains("days"), 'duration'] = \
df.loc[df.duration.str.contains("days"), 'duration'].str.findall("([0-9]+)\sdays").apply(lambda x: x[0])

In [73]:
df.loc[df.duration.str.contains("Freedom of movement"), 'duration'] = '999'

In [74]:
df[df.duration.str.contains("[^0-9]")]

Unnamed: 0,country,status,duration,notes,cat
63,Georgia,Visa not required,1 year,ID card valid,DE
37,Comoros,Visa on arrival,2 weeks,missing,PL
63,Georgia,Visa not required,1 year,ID card valid,PL


In [75]:
df.replace("1 year", "365", inplace=True)
df.replace("2 weeks", "14", inplace=True)

In [76]:
df.duration = df.duration.astype(int)

In [77]:
pl = df[df.cat == "PL"]
de = df[df.cat == "DE"]

In [78]:
len(de) == len(pl)

True

In [79]:
merged = pd.merge(pl, df, on="country", suffixes=('_PL', '_DE'))

### Longer stay on German passport

In [80]:
merged[merged.duration_DE > merged.duration_PL]

Unnamed: 0,country,status_PL,duration_PL,notes_PL,cat_PL,status_DE,duration_DE,notes_DE,cat_DE
22,Bahamas,Visa not required,90,missing,PL,Visa not required,240,missing,DE
28,Barbados,Visa not required,90,missing,PL,Visa not required,180,missing,DE
141,Guyana,Visa required,0,missing,PL,Visa not required,90,missing,DE
165,Jamaica,Visa not required,30,missing,PL,Visa not required,90,missing,DE
191,Lesotho,eVisa,0,missing,PL,Visa not required,14,missing,DE
229,Mongolia,Visa required,0,missing,PL,Visa not required,30,missing,DE
317,South Africa,Visa not required,30,missing,PL,Visa not required,90,missing,DE
327,Suriname,eVisa,0,missing,PL,E-tourist card,90,Multiple entry eVisa is also available.[230],DE
349,Tunisia,Visa not required,90,missing,PL,Visa not required,120,ID card valid in certain cases,DE


### Longer stay on Polish passport

In [83]:
merged[merged.duration_DE < merged.duration_PL]

Unnamed: 0,country,status_PL,duration_PL,notes_PL,cat_PL,status_DE,duration_DE,notes_DE,cat_DE
74,Comoros,Visa on arrival,14,missing,PL,Visa on arrival,0,missing,DE
151,India,eVisa,180,e-Visa holders must arrive via 26 designated a...,PL,eVisa,60,e-Visa holders must arrive via 26 designated a...,DE
205,Malawi,eVisa,90,missing,PL,eVisa,0,missing,DE
377,Vietnam,eVisa,30,missing,PL,Visa not required,15,A single entry eVisa valid for 30 days is also...,DE


### Visa not required on German passport

In [87]:
mask1 = merged.status_DE.str.contains("not required")
mask2 = merged.status_PL.str.contains("not required")

merged[mask1 * ~mask2]

Unnamed: 0,country,status_PL,duration_PL,notes_PL,cat_PL,status_DE,duration_DE,notes_DE,cat_DE
141,Guyana,Visa required,0,missing,PL,Visa not required,90,missing,DE
191,Lesotho,eVisa,0,missing,PL,Visa not required,14,missing,DE
229,Mongolia,Visa required,0,missing,PL,Visa not required,30,missing,DE
239,Namibia,Visa on arrival,90,Available at Hosea Kutako International Airport.,PL,Visa not required,90,3 months within a calendar year,DE
351,Turkey,eVisa,90,missing,PL,Visa not required,90,ID card valid[243] Former Turkish citizens wit...,DE
377,Vietnam,eVisa,30,missing,PL,Visa not required,15,A single entry eVisa valid for 30 days is also...,DE


### Visa not required on Polish passport

In [88]:
merged[~mask1 * mask2]

Unnamed: 0,country,status_PL,duration_PL,notes_PL,cat_PL,status_DE,duration_DE,notes_DE,cat_DE
