## **A) Tratamento de dados**

In [132]:
pip install jupyter-dash

Note: you may need to restart the kernel to use updated packages.


In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# eliminando primieras linhas (título do doc)
df = pd.read_csv("dados_paises.csv", delimiter=",")

In [3]:
# transpondo colunas de interesse para ser possível leitura no Tableau
df1 = df.melt(id_vars = ['Country Name', 'Country Code', 'Series Name', 'Series Code'],
             value_vars = ['1960','1961','1962','1963','1964','1965','1966','1967','1968','1969',
                           '1970','1971','1972','1973','1974','1975','1976','1977','1978','1979',
                           '1980','1981','1982','1983','1984','1985','1986','1987','1988','1999',
                           '1990','1991','1992','1993','1994','1995','1996','1997','1998','1999',
                           '2000','2001','2002','2003','2004','2005','2006','2007','2008','2009',
                           '2010','2011','2012','2013','2014','2015','2016','2017','2018','2019',
                           '2020','2021'],
             var_name='year', value_name='value')

In [4]:
df1 = df1[['Country Name', 'Country Code', 'Series Name', 'year', 'value']]

In [5]:
df1 = df1.rename(columns={'Country Name': 'country', 'Country Code': 'country_code', 'Series Name': 'indicator'})

In [6]:
df1

Unnamed: 0,country,country_code,indicator,year,value
0,Belgium,BEL,life_expectancy,1960,69.701951
1,Albania,ALB,life_expectancy,1960,54.439000
2,Austria,AUT,life_expectancy,1960,68.585610
3,Belarus,BLR,life_expectancy,1960,69.254610
4,Bulgaria,BGR,life_expectancy,1960,69.247561
...,...,...,...,...,...
52943,Virgin Islands (U.S.),VIR,inflation,2021,
52944,West Bank and Gaza,PSE,inflation,2021,1.237481
52945,,,"inflation,FP.CPI.TOTL.ZG,""Yemen, Rep."",YEM,,,,...",2021,
52946,Zambia,ZMB,inflation,2021,22.021234


In [7]:
busca = ['Albania','Austria','Belarus','Belgium','Bosnia and Herzegovina','Bulgaria','Croatia','Cyprus','Czechia',
         'Denmark','Estonia','Finland','France','Germany','Greece','Hungary','Iceland','Ireland','Italy','Latvia',
         'Lithuania','Luxembourg','Malta','Moldova','Montenegro','North Macedonia','Norway','Portugal','Romania',
         'Russian Federation','Slovak Republic','Serbia','Slovenia','Spain','Sweden','Switzerland','The Netherlands',
         'Ukraine','United Kingdom']
df2 = df1[df1['country'].isin(busca)]

In [9]:
len(busca)

39

In [8]:
df2.dtypes

country          object
country_code     object
indicator        object
year             object
value           float64
dtype: object

In [12]:
df2.indicator.unique()

array(['life_expectancy', 'gdp_growth', 'gdp_per_capita', 'inflation'],
      dtype=object)

In [10]:
df2.nunique()

country           38
country_code      38
indicator          4
year              61
value           7163
dtype: int64

In [43]:
df2.country = df2.country.astype('string')
df2.country_code = df2.country_code.astype('string')
df2.indicator = df2.indicator.astype('string')
df2.year = pd.to_numeric(df2.year)
df2.value = round(df2.value,2)

In [46]:
df2 = df2.sort_values(by=['indicator','country'])

In [47]:
df2

Unnamed: 0,country,country_code,indicator,year,value
218,Albania,ALB,gdp_growth,1960,
1086,Albania,ALB,gdp_growth,1961,
1954,Albania,ALB,gdp_growth,1962,
2822,Albania,ALB,gdp_growth,1963,
3690,Albania,ALB,gdp_growth,1964,
...,...,...,...,...,...
48813,United Kingdom,GBR,life_expectancy,2017,81.26
49681,United Kingdom,GBR,life_expectancy,2018,81.26
50549,United Kingdom,GBR,life_expectancy,2019,81.20
51417,United Kingdom,GBR,life_expectancy,2020,80.90


## Teste fusão

In [49]:
df_gdp = df2[df2.indicator=='gdp_growth']

In [50]:
df_gdp

Unnamed: 0,country,country_code,indicator,year,value
218,Albania,ALB,gdp_growth,1960,
1086,Albania,ALB,gdp_growth,1961,
1954,Albania,ALB,gdp_growth,1962,
2822,Albania,ALB,gdp_growth,1963,
3690,Albania,ALB,gdp_growth,1964,
...,...,...,...,...,...
49030,United Kingdom,GBR,gdp_growth,2017,2.44
49898,United Kingdom,GBR,gdp_growth,2018,1.71
50766,United Kingdom,GBR,gdp_growth,2019,1.60
51634,United Kingdom,GBR,gdp_growth,2020,-11.03


In [55]:
x = 60 - df_gdp.groupby(['country']).count().value

In [56]:
x.sort_values(ascending=False)

country
Montenegro                36
Latvia                    34
Slovenia                  34
Serbia                    34
Estonia                   34
Lithuania                 34
Moldova                   34
Croatia                   34
Bosnia and Herzegovina    33
Slovak Republic           31
Hungary                   30
Czechia                   29
Romania                   29
North Macedonia           29
Belarus                   29
Russian Federation        28
Ukraine                   27
Iceland                   25
Switzerland               20
Albania                   20
Bulgaria                  20
Cyprus                    15
Malta                     10
Ireland                   10
Germany                   10
Austria                    0
Denmark                    0
Belgium                    0
Sweden                     0
Spain                      0
France                     0
Finland                    0
Luxembourg                 0
Portugal                   0
Norway

In [67]:
df_alb1 = df2[df2.indicator=='gdp_growth']
df_alb2 = df_alb1[df_alb1.country=='Albania']

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df_alb2)

       country country_code   indicator  year  value
218    Albania          ALB  gdp_growth  1960    NaN
1086   Albania          ALB  gdp_growth  1961    NaN
1954   Albania          ALB  gdp_growth  1962    NaN
2822   Albania          ALB  gdp_growth  1963    NaN
3690   Albania          ALB  gdp_growth  1964    NaN
4558   Albania          ALB  gdp_growth  1965    NaN
5426   Albania          ALB  gdp_growth  1966    NaN
6294   Albania          ALB  gdp_growth  1967    NaN
7162   Albania          ALB  gdp_growth  1968    NaN
8030   Albania          ALB  gdp_growth  1969    NaN
8898   Albania          ALB  gdp_growth  1970    NaN
9766   Albania          ALB  gdp_growth  1971    NaN
10634  Albania          ALB  gdp_growth  1972    NaN
11502  Albania          ALB  gdp_growth  1973    NaN
12370  Albania          ALB  gdp_growth  1974    NaN
13238  Albania          ALB  gdp_growth  1975    NaN
14106  Albania          ALB  gdp_growth  1976    NaN
14974  Albania          ALB  gdp_growth  1977 

In [68]:
df_alb1 = df2[df2.indicator=='gdp_growth']
df_alb2 = df_alb1[df_alb1.country=='North Macedonia']

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df_alb2)

               country country_code   indicator  year  value
367    North Macedonia          MKD  gdp_growth  1960    NaN
1235   North Macedonia          MKD  gdp_growth  1961    NaN
2103   North Macedonia          MKD  gdp_growth  1962    NaN
2971   North Macedonia          MKD  gdp_growth  1963    NaN
3839   North Macedonia          MKD  gdp_growth  1964    NaN
4707   North Macedonia          MKD  gdp_growth  1965    NaN
5575   North Macedonia          MKD  gdp_growth  1966    NaN
6443   North Macedonia          MKD  gdp_growth  1967    NaN
7311   North Macedonia          MKD  gdp_growth  1968    NaN
8179   North Macedonia          MKD  gdp_growth  1969    NaN
9047   North Macedonia          MKD  gdp_growth  1970    NaN
9915   North Macedonia          MKD  gdp_growth  1971    NaN
10783  North Macedonia          MKD  gdp_growth  1972    NaN
11651  North Macedonia          MKD  gdp_growth  1973    NaN
12519  North Macedonia          MKD  gdp_growth  1974    NaN
13387  North Macedonia  

In [71]:
df_alb1 = df2[df2.indicator=='gdp_growth']
df_alb2 = df_alb1[df_alb1.country=='Slovak Republic']

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df_alb2)

               country country_code   indicator  year  value
390    Slovak Republic          SVK  gdp_growth  1960    NaN
1258   Slovak Republic          SVK  gdp_growth  1961    NaN
2126   Slovak Republic          SVK  gdp_growth  1962    NaN
2994   Slovak Republic          SVK  gdp_growth  1963    NaN
3862   Slovak Republic          SVK  gdp_growth  1964    NaN
4730   Slovak Republic          SVK  gdp_growth  1965    NaN
5598   Slovak Republic          SVK  gdp_growth  1966    NaN
6466   Slovak Republic          SVK  gdp_growth  1967    NaN
7334   Slovak Republic          SVK  gdp_growth  1968    NaN
8202   Slovak Republic          SVK  gdp_growth  1969    NaN
9070   Slovak Republic          SVK  gdp_growth  1970    NaN
9938   Slovak Republic          SVK  gdp_growth  1971    NaN
10806  Slovak Republic          SVK  gdp_growth  1972    NaN
11674  Slovak Republic          SVK  gdp_growth  1973    NaN
12542  Slovak Republic          SVK  gdp_growth  1974    NaN
13410  Slovak Republic  

In [69]:
df_alb1 = df2[df2.indicator=='gdp_growth']
df_alb2 = df_alb1[df_alb1.country=='Bosnia and Herzegovina']

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df_alb2)

                      country country_code   indicator  year  value
266    Bosnia and Herzegovina          BIH  gdp_growth  1960    NaN
1134   Bosnia and Herzegovina          BIH  gdp_growth  1961    NaN
2002   Bosnia and Herzegovina          BIH  gdp_growth  1962    NaN
2870   Bosnia and Herzegovina          BIH  gdp_growth  1963    NaN
3738   Bosnia and Herzegovina          BIH  gdp_growth  1964    NaN
4606   Bosnia and Herzegovina          BIH  gdp_growth  1965    NaN
5474   Bosnia and Herzegovina          BIH  gdp_growth  1966    NaN
6342   Bosnia and Herzegovina          BIH  gdp_growth  1967    NaN
7210   Bosnia and Herzegovina          BIH  gdp_growth  1968    NaN
8078   Bosnia and Herzegovina          BIH  gdp_growth  1969    NaN
8946   Bosnia and Herzegovina          BIH  gdp_growth  1970    NaN
9814   Bosnia and Herzegovina          BIH  gdp_growth  1971    NaN
10682  Bosnia and Herzegovina          BIH  gdp_growth  1972    NaN
11550  Bosnia and Herzegovina          BIH  gdp_

In [70]:
df_alb1 = df2[df2.indicator=='gdp_growth']
df_alb2 = df_alb1[df_alb1.country=='Croatia']

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df_alb2)

       country country_code   indicator  year  value
222    Croatia          HRV  gdp_growth  1960    NaN
1090   Croatia          HRV  gdp_growth  1961    NaN
1958   Croatia          HRV  gdp_growth  1962    NaN
2826   Croatia          HRV  gdp_growth  1963    NaN
3694   Croatia          HRV  gdp_growth  1964    NaN
4562   Croatia          HRV  gdp_growth  1965    NaN
5430   Croatia          HRV  gdp_growth  1966    NaN
6298   Croatia          HRV  gdp_growth  1967    NaN
7166   Croatia          HRV  gdp_growth  1968    NaN
8034   Croatia          HRV  gdp_growth  1969    NaN
8902   Croatia          HRV  gdp_growth  1970    NaN
9770   Croatia          HRV  gdp_growth  1971    NaN
10638  Croatia          HRV  gdp_growth  1972    NaN
11506  Croatia          HRV  gdp_growth  1973    NaN
12374  Croatia          HRV  gdp_growth  1974    NaN
13242  Croatia          HRV  gdp_growth  1975    NaN
14110  Croatia          HRV  gdp_growth  1976    NaN
14978  Croatia          HRV  gdp_growth  1977 

In [72]:
df_alb1 = df2[df2.indicator=='gdp_growth']
df_alb2 = df_alb1[df_alb1.country=='Serbia']

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df_alb2)

      country country_code   indicator  year  value
385    Serbia          SRB  gdp_growth  1960    NaN
1253   Serbia          SRB  gdp_growth  1961    NaN
2121   Serbia          SRB  gdp_growth  1962    NaN
2989   Serbia          SRB  gdp_growth  1963    NaN
3857   Serbia          SRB  gdp_growth  1964    NaN
4725   Serbia          SRB  gdp_growth  1965    NaN
5593   Serbia          SRB  gdp_growth  1966    NaN
6461   Serbia          SRB  gdp_growth  1967    NaN
7329   Serbia          SRB  gdp_growth  1968    NaN
8197   Serbia          SRB  gdp_growth  1969    NaN
9065   Serbia          SRB  gdp_growth  1970    NaN
9933   Serbia          SRB  gdp_growth  1971    NaN
10801  Serbia          SRB  gdp_growth  1972    NaN
11669  Serbia          SRB  gdp_growth  1973    NaN
12537  Serbia          SRB  gdp_growth  1974    NaN
13405  Serbia          SRB  gdp_growth  1975    NaN
14273  Serbia          SRB  gdp_growth  1976    NaN
15141  Serbia          SRB  gdp_growth  1977    NaN
16009  Serbi

In [73]:
df_alb1 = df2[df2.indicator=='gdp_growth']
df_alb2 = df_alb1[df_alb1.country=='Slovenia']

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df_alb2)

        country country_code   indicator  year  value
391    Slovenia          SVN  gdp_growth  1960    NaN
1259   Slovenia          SVN  gdp_growth  1961    NaN
2127   Slovenia          SVN  gdp_growth  1962    NaN
2995   Slovenia          SVN  gdp_growth  1963    NaN
3863   Slovenia          SVN  gdp_growth  1964    NaN
4731   Slovenia          SVN  gdp_growth  1965    NaN
5599   Slovenia          SVN  gdp_growth  1966    NaN
6467   Slovenia          SVN  gdp_growth  1967    NaN
7335   Slovenia          SVN  gdp_growth  1968    NaN
8203   Slovenia          SVN  gdp_growth  1969    NaN
9071   Slovenia          SVN  gdp_growth  1970    NaN
9939   Slovenia          SVN  gdp_growth  1971    NaN
10807  Slovenia          SVN  gdp_growth  1972    NaN
11675  Slovenia          SVN  gdp_growth  1973    NaN
12543  Slovenia          SVN  gdp_growth  1974    NaN
13411  Slovenia          SVN  gdp_growth  1975    NaN
14279  Slovenia          SVN  gdp_growth  1976    NaN
15147  Slovenia          SVN

In [74]:
df_alb1 = df2[df2.indicator=='gdp_growth']
df_alb2 = df_alb1[df_alb1.country=='Montenegro']

with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df_alb2)

          country country_code   indicator  year  value
239    Montenegro          MNE  gdp_growth  1960    NaN
1107   Montenegro          MNE  gdp_growth  1961    NaN
1975   Montenegro          MNE  gdp_growth  1962    NaN
2843   Montenegro          MNE  gdp_growth  1963    NaN
3711   Montenegro          MNE  gdp_growth  1964    NaN
4579   Montenegro          MNE  gdp_growth  1965    NaN
5447   Montenegro          MNE  gdp_growth  1966    NaN
6315   Montenegro          MNE  gdp_growth  1967    NaN
7183   Montenegro          MNE  gdp_growth  1968    NaN
8051   Montenegro          MNE  gdp_growth  1969    NaN
8919   Montenegro          MNE  gdp_growth  1970    NaN
9787   Montenegro          MNE  gdp_growth  1971    NaN
10655  Montenegro          MNE  gdp_growth  1972    NaN
11523  Montenegro          MNE  gdp_growth  1973    NaN
12391  Montenegro          MNE  gdp_growth  1974    NaN
13259  Montenegro          MNE  gdp_growth  1975    NaN
14127  Montenegro          MNE  gdp_growth  1976

In [91]:
# eliminando primieras linhas (título do doc)
df_i = pd.read_csv("gdp_iugoslavia.csv", delimiter=';')

In [92]:
df_i = df_i[['year','gdp_growth']]

In [93]:
df_i

Unnamed: 0,year,gdp_growth
0,1970,5.6
1,1971,8.1
2,1972,4.2
3,1973,5.0
4,1974,8.5
5,1975,3.6
6,1976,3.9
7,1977,8.0
8,1978,6.9
9,1979,7.0


In [97]:
for i in range(len(df2)):
    if df2['country'][i] == 'Albania':
        if df2['indicator'] == 'gdp_growth':
            if df2['year'][i] == 1970:
                df2['value'] == df_i[0]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

## **B) Web Scraping de eventos**

### **Import packages**

In [10]:
import warnings
warnings.filterwarnings('ignore')

# for performing your HTTP requests
import requests  
# for xml & html scrapping 
from bs4 import BeautifulSoup 
# for table analysis
import pandas as pd
# write to csv
import csv
# Time
import time
from datetime import datetime
#Visuals
import matplotlib.pyplot as plt

### **B.1) Web Scraping**

### Input URLs

In [12]:
# lista de URLs
url_pt = "https://en.wikipedia.org/wiki/Timeline_of_Portuguese_history"
url_de = "https://en.wikipedia.org/wiki/Timeline_of_German_history"
url_su = "https://en.wikipedia.org/wiki/Timeline_of_Swiss_history"
url_sw = "https://en.wikipedia.org/wiki/Timeline_of_Swedish_history"
url_au = "https://en.wikipedia.org/wiki/Timeline_of_Austrian_history"
url_ru = "https://en.wikipedia.org/wiki/Timeline_of_Russian_history"
url_sp = "https://en.wikipedia.org/wiki/Timeline_of_Spanish_history"
url_fr = "https://en.wikipedia.org/wiki/Timeline_of_French_history"
url_al = "https://en.wikipedia.org/wiki/Timeline_of_Albanian_history"
url_cy = "https://en.wikipedia.org/wiki/Timeline_of_Cypriot_history"
url_es = "https://en.wikipedia.org/wiki/Timeline_of_Estonian_history"
url_sl = "https://en.wikipedia.org/wiki/Timeline_of_Slovenian_history"
url_lt = "https://en.wikipedia.org/wiki/Timeline_of_Latvian_history"
url_sb = "https://en.wikipedia.org/wiki/Timeline_of_Serbian_history"
url_it = "https://en.wikipedia.org/wiki/Timeline_of_Italian_history"
url_ic = "https://en.wikipedia.org/wiki/Timeline_of_Icelandic_history"
url_ir = "https://en.wikipedia.org/wiki/Timeline_of_Irish_history"
url_be = "https://en.wikipedia.org/wiki/Timeline_of_Belgian_history"
url_po = "https://en.wikipedia.org/wiki/Timeline_of_Polish_history"
url_ro = "https://en.wikipedia.org/wiki/Timeline_of_Romanian_history"
url_uk = "https://en.wikipedia.org/wiki/Timeline_of_British_history"
url_cr = "https://en.wikipedia.org/wiki/Timeline_of_Croatian_history"
url_fi = "https://en.wikipedia.org/wiki/Timeline_of_Finnish_history"
url_bu = "https://en.wikipedia.org/wiki/Timeline_of_Bulgarian_history"
url_ma = "https://en.wikipedia.org/wiki/Timeline_of_Maltese_history"

### Request & Response

In [13]:
# Session helps to object allows you to persist certain parameters across requests
# By default, request will keep waiting for a response indefinitely. Therefore, it is advised to set the timeout parameter.
# If the request was successful, you should see the reponse output as '200'.
s = requests.Session()

response_pt = s.get(url_pt, timeout=10)
response_de = s.get(url_de, timeout=10)
response_sw = s.get(url_sw, timeout=10)
response_su = s.get(url_su, timeout=10)
response_au = s.get(url_au, timeout=10)
response_ru = s.get(url_ru, timeout=10)
response_sp = s.get(url_sp, timeout=10)
response_fr = s.get(url_fr, timeout=10)
response_al = s.get(url_al, timeout=10)
response_cy = s.get(url_cy, timeout=10)
response_es = s.get(url_es, timeout=10)
response_sl = s.get(url_sl, timeout=10)
response_lt = s.get(url_lt, timeout=10)
response_sb = s.get(url_sb, timeout=10)
response_it = s.get(url_it, timeout=10)
response_ic = s.get(url_ic, timeout=10)
response_ir = s.get(url_ir, timeout=10)
response_be = s.get(url_be, timeout=10)
response_po = s.get(url_po, timeout=10)
response_ro = s.get(url_ro, timeout=10)
response_uk = s.get(url_uk, timeout=10)
response_cr = s.get(url_cr, timeout=10)
response_fi = s.get(url_fi, timeout=10)
response_bu = s.get(url_bu, timeout=10)
response_ma = s.get(url_ma, timeout=10)

### Wrangling HTML With BeautifulSoup

In [14]:
# parse response content to html
soup_pt = BeautifulSoup(response_pt.content, 'html.parser')
soup_de = BeautifulSoup(response_de.content, 'html.parser')
soup_sw = BeautifulSoup(response_sw.content, 'html.parser')
soup_su = BeautifulSoup(response_su.content, 'html.parser')
soup_au = BeautifulSoup(response_au.content, 'html.parser')
soup_ru = BeautifulSoup(response_ru.content, 'html.parser')
soup_sp = BeautifulSoup(response_sp.content, 'html.parser')
soup_fr = BeautifulSoup(response_fr.content, 'html.parser')
soup_al = BeautifulSoup(response_al.content, 'html.parser')
soup_cy = BeautifulSoup(response_cy.content, 'html.parser')
soup_es = BeautifulSoup(response_es.content, 'html.parser')
soup_sl = BeautifulSoup(response_sl.content, 'html.parser')
soup_lt = BeautifulSoup(response_lt.content, 'html.parser')
soup_sb = BeautifulSoup(response_sb.content, 'html.parser')
soup_it = BeautifulSoup(response_it.content, 'html.parser')
soup_ic = BeautifulSoup(response_ic.content, 'html.parser')
soup_ir = BeautifulSoup(response_ir.content, 'html.parser')
soup_be = BeautifulSoup(response_be.content, 'html.parser')
soup_po = BeautifulSoup(response_po.content, 'html.parser')
soup_ro = BeautifulSoup(response_ro.content, 'html.parser')
soup_uk = BeautifulSoup(response_uk.content, 'html.parser')
soup_cr = BeautifulSoup(response_cr.content, 'html.parser')
soup_fi = BeautifulSoup(response_fi.content, 'html.parser')
soup_bu = BeautifulSoup(response_bu.content, 'html.parser')
soup_ma = BeautifulSoup(response_ma.content, 'html.parser')

# to view the content in html format
pretty_soup_pt = soup_pt.prettify()
pretty_soup_de = soup_de.prettify()
pretty_soup_sw = soup_sw.prettify()
pretty_soup_su = soup_su.prettify()
pretty_soup_au = soup_au.prettify()
pretty_soup_ru = soup_ru.prettify()
pretty_soup_sp = soup_sp.prettify()
pretty_soup_fr = soup_fr.prettify()
pretty_soup_al = soup_al.prettify()
pretty_soup_cy = soup_cy.prettify()
pretty_soup_es = soup_es.prettify()
pretty_soup_sl = soup_sl.prettify()
pretty_soup_lt = soup_lt.prettify()
pretty_soup_sb = soup_sb.prettify()
pretty_soup_it = soup_it.prettify()
pretty_soup_ic = soup_ic.prettify()
pretty_soup_ir = soup_it.prettify()
pretty_soup_be = soup_be.prettify()
pretty_soup_po = soup_po.prettify()
pretty_soup_ro = soup_ro.prettify()
pretty_soup_uk = soup_uk.prettify()
pretty_soup_cr = soup_cr.prettify()
pretty_soup_fi = soup_fi.prettify()
pretty_soup_bu = soup_bu.prettify()
pretty_soup_ma = soup_ma.prettify()

### Managing two last tables (20th and 21st centuries)

In [15]:
# last table
last_tables_pt = soup_pt.find_all('table')[-1]
last_tables_de = soup_de.find_all('table')[-8]
last_tables_sw = soup_sw.find_all('table')[-2]
last_tables_su = soup_su.find_all('table')[-7]
last_tables_au = soup_au.find_all('table')[-1]
last_tables_ru = soup_ru.find_all('table')[-7]
last_tables_sp = soup_sp.find_all('table')[-1]
last_tables_fr = soup_fr.find_all('table')[-2]
last_tables_al = soup_al.find_all('table')[-17]
last_tables_cy = soup_cy.find_all('table')[-2]
last_tables_es = soup_es.find_all('table')[-3]
last_tables_sl = soup_sl.find_all('table')[-2]
last_tables_lt = soup_lt.find_all('table')[-2]
last_tables_sb = soup_sb.find_all('table')[-3]
last_tables_it = soup_it.find_all('table')[-9]
last_tables_ic = soup_ic.find_all('table')[-2]
last_tables_ir = soup_ir.find_all('table')[-11]
last_tables_be = soup_be.find_all('table')[-9]
last_tables_po = soup_po.find_all('table')[-8]
last_tables_ro = soup_ro.find_all('table')[-5]
last_tables_uk = soup_uk.find_all('table')[-1]
last_tables_cr = soup_cr.find_all('table')[-5]
last_tables_fi = soup_fi.find_all('table')[-1]
last_tables_ma = soup_bu.find_all('table')[-3]
last_tables_bu = soup_bu.find_all('table')[-3]

# penultimate table
pen_tables_pt = soup_pt.find_all('table')[-2]
pen_tables_de = soup_de.find_all('table')[-9]
pen_tables_sw = soup_sw.find_all('table')[-3]
pen_tables_su = soup_su.find_all('table')[-8]
pen_tables_au = soup_au.find_all('table')[-2]
pen_tables_ru = soup_ru.find_all('table')[-8]
pen_tables_sp = soup_sp.find_all('table')[-2]
pen_tables_fr = soup_fr.find_all('table')[-3]
pen_tables_al = soup_al.find_all('table')[-18]
pen_tables_cy = soup_cy.find_all('table')[-3]
pen_tables_es = soup_es.find_all('table')[-4]
pen_tables_sl = soup_sl.find_all('table')[-3]
pen_tables_lt = soup_lt.find_all('table')[-3]
pen_tables_sb = soup_sb.find_all('table')[-4]
pen_tables_it = soup_it.find_all('table')[-10]
pen_tables_ic = soup_ic.find_all('table')[-3]
pen_tables_ir = soup_ir.find_all('table')[-12]
pen_tables_be = soup_be.find_all('table')[-10]
pen_tables_po = soup_po.find_all('table')[-9]
pen_tables_ro = soup_ro.find_all('table')[-6]
pen_tables_uk = soup_uk.find_all('table')[-2]
pen_tables_cr = soup_cr.find_all('table')[-6]
pen_tables_fi = soup_fi.find_all('table')[-2]
pen_tables_bu = soup_bu.find_all('table')[-1]
pen_tables_ma = soup_ma.find_all('table')[-7]
pen_tables_bu = soup_bu.find_all('table')[-4]

In [16]:
# capturing headers - last
rows_pt = last_tables_pt.findAll("tr")
rows_de = last_tables_de.findAll("tr")
rows_sw = last_tables_sw.findAll("tr")
rows_su = last_tables_su.findAll("tr")
rows_au = last_tables_au.findAll("tr")
rows_ru = last_tables_ru.findAll("tr")
rows_sp = last_tables_sp.findAll("tr")
rows_fr = last_tables_fr.findAll("tr")
rows_al = last_tables_al.findAll("tr")
rows_cy = last_tables_cy.findAll("tr")
rows_es = last_tables_es.findAll("tr")
rows_sl = last_tables_sl.findAll("tr")
rows_lt = last_tables_lt.findAll("tr")
rows_sb = last_tables_sb.findAll("tr")
rows_it = last_tables_it.findAll("tr")
rows_ic = last_tables_ic.findAll("tr")
rows_ir = last_tables_ir.findAll("tr")
rows_be = last_tables_be.findAll("tr")
rows_po = last_tables_po.findAll("tr")
rows_ro = last_tables_ro.findAll("tr")
rows_uk = last_tables_uk.findAll("tr")
rows_cr = last_tables_cr.findAll("tr")
rows_fi = last_tables_fi.findAll("tr")
rows_bu = last_tables_bu.findAll("tr")
rows_ma = last_tables_ma.findAll("tr")

# capturing - penultimate
rows2_pt = pen_tables_pt.findAll("tr")
rows2_de = pen_tables_de.findAll("tr")
rows2_sw = pen_tables_sw.findAll("tr")
rows2_su = pen_tables_su.findAll("tr")
rows2_au = pen_tables_au.findAll("tr")
rows2_ru = pen_tables_ru.findAll("tr")
rows2_sp = pen_tables_sp.findAll("tr")
rows2_fr = pen_tables_fr.findAll("tr")
rows2_al = pen_tables_al.findAll("tr")
rows2_cy = pen_tables_cy.findAll("tr")
rows2_es = pen_tables_es.findAll("tr")
rows2_sl = pen_tables_sl.findAll("tr")
rows2_lt = pen_tables_lt.findAll("tr")
rows2_sb = pen_tables_sb.findAll("tr")
rows2_it = pen_tables_it.findAll("tr")
rows2_ic = pen_tables_ic.findAll("tr")
rows2_ir = pen_tables_ir.findAll("tr")
rows2_be = pen_tables_be.findAll("tr")
rows2_po = pen_tables_po.findAll("tr")
rows2_ro = pen_tables_ro.findAll("tr")
rows2_uk = pen_tables_uk.findAll("tr")
rows2_cr = pen_tables_cr.findAll("tr")
rows2_fi = pen_tables_fi.findAll("tr")
rows2_bu = pen_tables_bu.findAll("tr")
rows2_ma = pen_tables_ma.findAll("tr")

In [17]:
# creating headers list - last
header_pt = [th.text.rstrip() for th in rows_pt[0].find_all('th')]
header_de = [ti.text.rstrip() for ti in rows_de[0].find_all('th')]
header_sw = [tj.text.rstrip() for tj in rows_sw[0].find_all('th')]
header_su = [tk.text.rstrip() for tk in rows_su[0].find_all('th')]
header_au = [tl.text.rstrip() for tl in rows_au[0].find_all('th')]
header_ru = [tm.text.rstrip() for tm in rows_ru[0].find_all('th')]
header_sp = [tn.text.rstrip() for tn in rows_sp[0].find_all('th')]
header_fr = [to.text.rstrip() for to in rows_fr[0].find_all('th')]
header_al = [tp.text.rstrip() for tp in rows_al[0].find_all('th')]
header_cy = [tq.text.rstrip() for tq in rows_cy[0].find_all('th')]
header_es = [tr.text.rstrip() for tr in rows_es[0].find_all('th')]
header_sl = [ts.text.rstrip() for ts in rows_sl[0].find_all('th')]
header_lt = [tt.text.rstrip() for tt in rows_lt[0].find_all('th')]
header_sb = [tu.text.rstrip() for tu in rows_sb[0].find_all('th')]
header_it = [tv.text.rstrip() for tv in rows_it[0].find_all('th')]
header_ic = [tw.text.rstrip() for tw in rows_ic[0].find_all('th')]
header_ir = [tx.text.rstrip() for tx in rows_ir[0].find_all('th')]
header_be = [ty.text.rstrip() for ty in rows_be[0].find_all('th')]
header_po = [tz.text.rstrip() for tz in rows_po[0].find_all('th')]
header_ro = [ua.text.rstrip() for ua in rows_ro[0].find_all('th')]
header_uk = [ub.text.rstrip() for ub in rows_uk[0].find_all('th')]
header_cr = [uc.text.rstrip() for uc in rows_cr[0].find_all('th')]
header_fi = [ud.text.rstrip() for ud in rows_fi[0].find_all('th')]
header_bu = [ue.text.rstrip() for ue in rows_bu[0].find_all('th')]
header_ma = [uf.text.rstrip() for uf in rows_ma[0].find_all('th')]

# creating headers list - penultimate
header2_pt = [th.text.rstrip() for th in rows2_pt[0].find_all('th')]
header2_de = [ti.text.rstrip() for ti in rows2_de[0].find_all('th')]
header2_sw = [tj.text.rstrip() for tj in rows2_sw[0].find_all('th')]
header2_su = [tk.text.rstrip() for tk in rows2_su[0].find_all('th')]
header2_au = [tl.text.rstrip() for tl in rows2_au[0].find_all('th')]
header2_ru = [tm.text.rstrip() for tm in rows2_ru[0].find_all('th')]
header2_sp = [tn.text.rstrip() for tn in rows2_sp[0].find_all('th')]
header2_fr = [to.text.rstrip() for to in rows2_fr[0].find_all('th')]
header2_al = [tp.text.rstrip() for tp in rows2_al[0].find_all('th')]
header2_cy = [tq.text.rstrip() for tq in rows2_cy[0].find_all('th')]
header2_es = [tr.text.rstrip() for tr in rows2_es[0].find_all('th')]
header2_sl = [ts.text.rstrip() for ts in rows2_sl[0].find_all('th')]
header2_lt = [tt.text.rstrip() for tt in rows2_lt[0].find_all('th')]
header2_sb = [tu.text.rstrip() for tu in rows2_sb[0].find_all('th')]
header2_it = [tv.text.rstrip() for tv in rows2_it[0].find_all('th')]
header2_ic = [tw.text.rstrip() for tw in rows2_ic[0].find_all('th')]
header2_ir = [tx.text.rstrip() for tx in rows2_ir[0].find_all('th')]
header2_be = [ty.text.rstrip() for ty in rows2_be[0].find_all('th')]
header2_po = [tz.text.rstrip() for tz in rows2_po[0].find_all('th')]
header2_ro = [ua.text.rstrip() for ua in rows2_ro[0].find_all('th')]
header2_uk = [ub.text.rstrip() for ub in rows2_uk[0].find_all('th')]
header2_cr = [uc.text.rstrip() for uc in rows2_cr[0].find_all('th')]
header2_fi = [ud.text.rstrip() for ud in rows2_fi[0].find_all('th')]
header2_bu = [ue.text.rstrip() for ue in rows2_bu[0].find_all('th')]
header2_ma = [uf.text.rstrip() for uf in rows2_ma[0].find_all('th')]

### **B.2) Criando dataframe para exportar**

### Creating data

In [18]:
# creating tables - last
lst_data_pt = []
lst_data_de = []
lst_data_sw = []
lst_data_su = []
lst_data_au = []
lst_data_ru = []
lst_data_sp = []
lst_data_fr = []
lst_data_al = []
lst_data_cy = []
lst_data_es = []
lst_data_sl = []
lst_data_lt = []
lst_data_sb = []
lst_data_it = []
lst_data_ic = []
lst_data_ir = []
lst_data_be = []
lst_data_po = []
lst_data_ro = []
lst_data_uk = []
lst_data_cr = []
lst_data_fi = []
lst_data_bu = []
lst_data_ma = []

for i1 in rows_pt[1:]:
    data_pt = [a.text.rstrip() for a in i1.find_all('td')]
    lst_data_pt.append(data_pt)
for j1 in rows_de[1:]:
    data_de = [b.text.rstrip() for b in j1.find_all('td')]
    lst_data_de.append(data_de)
for k1 in rows_sw[1:]:
    data_sw = [c.text.rstrip() for c in k1.find_all('td')]
    lst_data_sw.append(data_sw)
for l1 in rows_su[1:]:
    data_su = [d.text.rstrip() for d in l1.find_all('td')]
    lst_data_su.append(data_su)
for m1 in rows_au[1:]:
    data_au = [e.text.rstrip() for e in m1.find_all('td')]
    lst_data_au.append(data_au)
for n1 in rows_ru[1:]:
    data_ru = [f.text.rstrip() for f in n1.find_all('td')]
    lst_data_ru.append(data_ru)
for o1 in rows_sp[1:]:
    data_sp = [g.text.rstrip() for g in o1.find_all('td')]
    lst_data_sp.append(data_sp)
for p1 in rows_fr[1:]:
    data_fr = [h.text.rstrip() for h in p1.find_all('td')]
    lst_data_fr.append(data_fr)
for q1 in rows_al[1:]:
    data_al = [i.text.rstrip() for i in q1.find_all('td')]
    lst_data_al.append(data_al)
for r1 in rows_cy[1:]:
    data_cy = [j.text.rstrip() for j in r1.find_all('td')]
    lst_data_cy.append(data_cy)
for t1 in rows_es[1:]:
    data_es = [k.text.rstrip() for k in t1.find_all('td')]
    lst_data_es.append(data_es)
for u1 in rows_sl[1:]:
    data_sl = [l.text.rstrip() for l in u1.find_all('td')]
    lst_data_sl.append(data_sl)
for v1 in rows_lt[1:]:
    data_lt = [m.text.rstrip() for m in v1.find_all('td')]
    lst_data_lt.append(data_lt)
for w1 in rows_sb[1:]:
    data_sb = [n.text.rstrip() for n in w1.find_all('td')]
    lst_data_sb.append(data_sb)
for x1 in rows_it[1:]:
    data_it = [o.text.rstrip() for o in x1.find_all('td')]
    lst_data_it.append(data_it)
for y1 in rows_ic[1:]:
    data_ic = [p.text.rstrip() for p in y1.find_all('td')]
    lst_data_ic.append(data_ic)
for z1 in rows_ir[1:]:
    data_ir = [q.text.rstrip() for q in z1.find_all('td')]
    lst_data_ir.append(data_ir)
for a2 in rows_be[1:]:
    data_be = [r.text.rstrip() for r in a2.find_all('td')]
    lst_data_be.append(data_be)
for b2 in rows_po[1:]:
    data_po = [s.text.rstrip() for s in b2.find_all('td')]
    lst_data_po.append(data_po)
for c2 in rows_ro[1:]:
    data_ro = [t.text.rstrip() for t in c2.find_all('td')]
    lst_data_ro.append(data_ro)
for d2 in rows_uk[1:]:
    data_uk = [u.text.rstrip() for u in d2.find_all('td')]
    lst_data_uk.append(data_uk)
for e2 in rows_cr[1:]:
    data_cr = [v.text.rstrip() for v in e2.find_all('td')]
    lst_data_cr.append(data_cr)
for f2 in rows_fi[1:]:
    data_fi = [w.text.rstrip() for w in f2.find_all('td')]
    lst_data_fi.append(data_fi)
for g2 in rows_bu[1:]:
    data_bu = [x.text.rstrip() for x in g2.find_all('td')]
    lst_data_bu.append(data_bu)
for h2 in rows_ma[1:]:
    data_ma = [y.text.rstrip() for y in h2.find_all('td')]
    lst_data_ma.append(data_ma)


# creating tables - penultimate
pen_data_pt = []
pen_data_de = []
pen_data_sw = []
pen_data_su = []
pen_data_au = []
pen_data_ru = []
pen_data_sp = []
pen_data_fr = []
pen_data_al = []
pen_data_cy = []
pen_data_es = []
pen_data_sl = []
pen_data_lt = []
pen_data_sb = []
pen_data_it = []
pen_data_ic = []
pen_data_ir = []
pen_data_be = []
pen_data_po = []
pen_data_ro = []
pen_data_uk = []
pen_data_cr = []
pen_data_fi = []
pen_data_bu = []
pen_data_ma = []

for i1 in rows2_pt[1:]:
    data2_pt = [a.text.rstrip() for a in i1.find_all('td')]
    pen_data_pt.append(data2_pt)
for j1 in rows2_de[1:]:
    data2_de = [b.text.rstrip() for b in j1.find_all('td')]
    pen_data_de.append(data2_de)
for k1 in rows2_sw[1:]:
    data2_sw = [c.text.rstrip() for c in k1.find_all('td')]
    pen_data_sw.append(data2_sw)
for l1 in rows2_su[1:]:
    data2_su = [d.text.rstrip() for d in l1.find_all('td')]
    pen_data_su.append(data2_su)
for m1 in rows2_au[1:]:
    data2_au = [e.text.rstrip() for e in m1.find_all('td')]
    pen_data_au.append(data2_au)
for n1 in rows2_ru[1:]:
    data2_ru = [f.text.rstrip() for f in n1.find_all('td')]
    pen_data_ru.append(data2_ru)
for o1 in rows2_sp[1:]:
    data2_sp = [g.text.rstrip() for g in o1.find_all('td')]
    pen_data_sp.append(data2_sp)
for p1 in rows2_fr[1:]:
    data2_fr = [h.text.rstrip() for h in p1.find_all('td')]
    pen_data_fr.append(data2_fr)
for q1 in rows2_al[1:]:
    data2_al = [i.text.rstrip() for i in q1.find_all('td')]
    pen_data_al.append(data2_al)
for r1 in rows2_cy[1:]:
    data2_cy = [j.text.rstrip() for j in r1.find_all('td')]
    pen_data_cy.append(data2_cy)
for t1 in rows2_es[1:]:
    data2_es = [k.text.rstrip() for k in t1.find_all('td')]
    pen_data_es.append(data2_es)
for u1 in rows2_sl[1:]:
    data2_sl = [l.text.rstrip() for l in u1.find_all('td')]
    pen_data_sl.append(data2_sl)
for v1 in rows2_lt[1:]:
    data2_lt = [m.text.rstrip() for m in v1.find_all('td')]
    pen_data_lt.append(data2_lt)
for w1 in rows2_sb[1:]:
    data2_sb = [n.text.rstrip() for n in w1.find_all('td')]
    pen_data_sb.append(data2_sb)
for x1 in rows2_it[1:]:
    data2_it = [o.text.rstrip() for o in x1.find_all('td')]
    pen_data_it.append(data2_it)
for y1 in rows2_ic[1:]:
    data2_ic = [p.text.rstrip() for p in y1.find_all('td')]
    pen_data_ic.append(data2_ic)
for z1 in rows2_ir[1:]:
    data2_ir = [q.text.rstrip() for q in z1.find_all('td')]
    pen_data_ir.append(data2_ir)
for a2 in rows2_be[1:]:
    data2_be = [r.text.rstrip() for r in a2.find_all('td')]
    pen_data_be.append(data2_be)
for b2 in rows2_po[1:]:
    data2_po = [s.text.rstrip() for s in b2.find_all('td')]
    pen_data_po.append(data2_po)
for c2 in rows2_ro[1:]:
    data2_ro = [t.text.rstrip() for t in c2.find_all('td')]
    pen_data_ro.append(data2_ro)
for d2 in rows2_uk[1:]:
    data2_uk = [u.text.rstrip() for u in d2.find_all('td')]
    pen_data_uk.append(data2_uk)
for e2 in rows2_cr[1:]:
    data2_cr = [v.text.rstrip() for v in e2.find_all('td')]
    pen_data_cr.append(data2_cr)
for f2 in rows2_fi[1:]:
    data2_fi = [w.text.rstrip() for w in f2.find_all('td')]
    pen_data_fi.append(data2_fi)
for g2 in rows2_bu[1:]:
    data2_bu = [x.text.rstrip() for x in g2.find_all('td')]
    pen_data_bu.append(data2_bu)
for h2 in rows2_ma[1:]:
    data2_ma = [y.text.rstrip() for y in h2.find_all('td')]
    pen_data_ma.append(data2_ma)    

In [19]:
pd.set_option('max_colwidth', None)

In [20]:
## 21th CENTURY
# portugal
lst_data_pt = pd.DataFrame(lst_data_pt, columns=header_pt)
df_pt = lst_data_pt.copy()
df_pt1 = df_pt.dropna()
df_pt1.dropna()
df_pt1 = df_pt1.drop(columns='Date')
df_pt1['Country'] = "Portugal"
df_pt2 = df_pt1[df_pt1['Year'].astype(int)<datetime.now().year-1]
df_pt2 = df_pt2[['Year','Country','Event']]
df_pt2

# germany
lst_data_de = pd.DataFrame(lst_data_de, columns=header_de)
df_de = lst_data_de.copy()
df_de1 = df_de.dropna()
df_de1.dropna()
df_de1 = df_de1.drop(columns=['Date','Source'])
df_de1['Country'] = "Germany"
df_de2 = df_de1[df_de1['Year'].astype(int)<datetime.now().year-1]
df_de2 = df_de2[['Year','Country','Event']]
df_de2

# sweden
lst_data_sw = pd.DataFrame(lst_data_sw, columns=header_sw)
df_sw = lst_data_sw.copy()
df_sw1 = df_sw.dropna()
df_sw1.dropna()
df_sw1 = df_sw1.drop(columns='Date')
df_sw1['Country'] = "Sweden"
df_sw2 = df_sw1[df_sw1['Year'].astype(int)<datetime.now().year-1]
df_sw2 = df_sw2[['Year','Country','Event']]
df_sw2

# switzerland
lst_data_su = pd.DataFrame(lst_data_su, columns=header_su)
df_su = lst_data_su.copy()
df_su1 = df_su.dropna()
df_su1.dropna()
df_su1 = df_su1.drop(columns='Date')
df_su1['Country'] = "Switzerland"
df_su2 = df_su1[df_su1['Year'].astype(int)<datetime.now().year-1]
df_su2 = df_su2[['Year','Country','Event']]
df_su2

# austria
lst_data_au = pd.DataFrame(lst_data_au, columns=header_au)
df_au = lst_data_au.copy()
df_au1 = df_au.dropna()
df_au1.dropna()
df_au1 = df_au1.drop(columns='Date')
df_au1['Country'] = "Austria"
df_au2 = df_au1[df_au1['Year'].astype(int)<datetime.now().year-1]
df_au2 = df_au2[['Year','Country','Event']]
df_au2

# russia
lst_data_ru = pd.DataFrame(lst_data_ru, columns=header_ru)
df_ru = lst_data_ru.copy()
df_ru1 = df_ru.dropna()
df_ru1.dropna()
df_ru1 = df_ru1.drop(columns='Date')
df_ru1['Country'] = "Russia"
df_ru2 = df_ru1[df_ru1['Year'].astype(int)<datetime.now().year-1]
df_ru2 = df_ru2[['Year','Country','Event']]
df_ru2

# spain
lst_data_sp = pd.DataFrame(lst_data_sp, columns=header_sp)
df_sp = lst_data_sp.copy()
df_sp1 = df_sp.dropna()
df_sp1.dropna()
df_sp1 = df_sp1.drop(columns='Date')
df_sp1['Country'] = "Spain"
df_sp2 = df_sp1[df_sp1['Year'].astype(int)<datetime.now().year-1]
df_sp2 = df_sp2[['Year','Country','Event']]
df_sp2

# france
lst_data_fr = pd.DataFrame(lst_data_fr, columns=header_fr)
df_fr = lst_data_fr.copy()
df_fr1 = df_fr.dropna()
df_fr1.dropna()
df_fr1 = df_fr1.drop(columns='Date')
df_fr1['Country'] = "France"
df_fr2 = df_fr1[df_fr1['Year'].astype(int)<datetime.now().year-1]
df_fr2 = df_fr2[['Year','Country','Event']]
df_fr2

# albania
lst_data_al = pd.DataFrame(lst_data_al, columns=header_al)
df_al = lst_data_al.copy()
df_al1 = df_al.dropna()
df_al1.dropna()
df_al1 = df_al1.drop(columns='Date')
df_al1['Country'] = "Albania"
df_al2 = df_al1[df_al1['Year'].astype(int)<datetime.now().year-1]
df_al2 = df_al2[['Year','Country','Event']]
df_al2

# cyprus
lst_data_cy = pd.DataFrame(lst_data_cy, columns=header_cy)
df_cy = lst_data_cy.copy()
df_cy1 = df_cy.dropna()
df_cy1.dropna()
df_cy1 = df_cy1.drop(columns='Date')
df_cy1['Country'] = "Cyprus"
df_cy2 = df_cy1[df_cy1['Year'].astype(int)<datetime.now().year-1]
df_cy2 = df_cy2[['Year','Country','Event']]
df_cy2

# estonia
lst_data_es = pd.DataFrame(lst_data_es, columns=header_es)
df_es = lst_data_es.copy()
df_es1 = df_es.dropna()
df_es1.dropna()
df_es1 = df_es1.drop(columns='Date')
df_es1['Country'] = "Estonia"
df_es2 = df_es1[df_es1['Year'].astype(int)<datetime.now().year-1]
df_es2 = df_es2[['Year','Country','Event']]
df_es2

# slovenia
lst_data_sl = pd.DataFrame(lst_data_sl, columns=header_sl)
df_sl = lst_data_sl.copy()
df_sl1 = df_sl.dropna()
df_sl1.dropna()
df_sl1 = df_sl1.drop(columns='Date')
df_sl1['Country'] = "Slovenia"
df_sl2 = df_sl1[df_sl1['Year'].astype(int)<datetime.now().year-1]
df_sl2 = df_sl2[['Year','Country','Event']]
df_sl2

# latvia
lst_data_lt = pd.DataFrame(lst_data_lt, columns=header_lt)
df_lt = lst_data_lt.copy()
df_lt1 = df_lt.dropna()
df_lt1.dropna()
df_lt1 = df_lt1.drop(columns='Date')
df_lt1['Country'] = "Latvia"
df_lt2 = df_lt1[df_lt1['Year'].astype(int)<datetime.now().year-1]
df_lt2 = df_lt2[['Year','Country','Event']]
df_lt2

# serbia
lst_data_sb = pd.DataFrame(lst_data_sb, columns=header_sb)
df_sb = lst_data_sb.copy()
df_sb1 = df_sb.dropna()
df_sb1.dropna()
df_sb1 = df_sb1.drop(columns='Date')
df_sb1['Country'] = "Serbia"
df_sb2 = df_sb1[df_sb1['Year'].astype(int)<datetime.now().year-1]
df_sb2 = df_sb2[['Year','Country','Event']]
df_sb2

# italy
lst_data_it = pd.DataFrame(lst_data_it, columns=header_it)
df_it = lst_data_it.copy()
df_it1 = df_it.dropna()
df_it1.dropna()
df_it1 = df_it1.drop(columns='Date')
df_it1['Country'] = "Italy"
df_it2 = df_it1[df_it1['Year'].astype(int)<datetime.now().year-1]
df_it2 = df_it2[['Year','Country','Event']]
df_it2

# iceland
lst_data_ic = pd.DataFrame(lst_data_ic, columns=header_ic)
df_ic = lst_data_ic.copy()
df_ic1 = df_ic.dropna()
df_ic1.dropna()
df_ic1 = df_ic1.drop(columns='Date')
df_ic1['Country'] = "Iceland"
df_ic2 = df_ic1[df_ic1['Year'].astype(int)<datetime.now().year-1]
df_ic2 = df_ic2[['Year','Country','Event']]
df_ic2

# ireland
lst_data_ir = pd.DataFrame(lst_data_ir, columns=header_ir)
df_ir = lst_data_ir.copy()
df_ir1 = df_ir.dropna()
df_ir1.dropna()
df_ir1 = df_ir1.drop(columns='Date')
df_ir1['Country'] = "Ireland"
df_ir2 = df_ir1[df_ir1['Year'].astype(int)<datetime.now().year-1]
df_ir2 = df_ir2[['Year','Country','Event']]
df_ir2

# belgium
lst_data_be = pd.DataFrame(lst_data_be, columns=header_be)
df_be = lst_data_be.copy()
df_be1 = df_be[df_be['Year']!=""]
df_be1['Country'] = "Belgium"
df_be1 = df_be1[['Year','Country','Event']]
df_be2 = df_be1[df_be1['Year'].astype(int)<datetime.now().year-1]
df_be2

# poland
lst_data_po = pd.DataFrame(lst_data_po, columns=header_po)
df_po = lst_data_po.copy()
df_po1 = df_po.dropna()
df_po1.dropna()
df_po1 = df_po1.drop(columns='Date')
df_po1['Country'] = "Poland"
df_po2 = df_po1[df_po1['Year'].astype(int)<datetime.now().year-1]
df_po2 = df_po2[['Year','Country','Event']]
df_po2

# romania
lst_data_ro = pd.DataFrame(lst_data_ro, columns=header_ro)
df_ro = lst_data_ro.copy()
df_ro1 = df_ro.dropna()
df_ro1.dropna()
df_ro1 = df_ro1.drop(columns='Date')
df_ro1['Country'] = "Romania"
df_ro2 = df_ro1[df_ro1['Year'].astype(int)<datetime.now().year-1]
df_ro2 = df_ro2[['Year','Country','Event']]
df_ro2

# uk
lst_data_uk = pd.DataFrame(lst_data_uk, columns=header_uk)
df_uk = lst_data_uk.copy()
df_uk1 = df_uk.dropna()
df_uk1.dropna()
df_uk1 = df_uk1.drop(columns='Date')
df_uk1['Country'] = "United Kingdom"
df_uk2 = df_uk1[df_uk1['Year'].astype(int)<datetime.now().year-1]
df_uk2 = df_uk2[['Year','Country','Event']]
df_uk2

# croatia
lst_data_cr = pd.DataFrame(lst_data_cr, columns=header_cr)
df_cr = lst_data_cr.copy()
df_cr1 = df_cr.dropna()
df_cr1.dropna()
df_cr1 = df_cr1.drop(columns='Date')
df_cr1['Country'] = "Croatia"
df_cr1['Year'][16] = "2019"
df_cr2 = df_cr1[df_cr1['Year'].astype(int)<datetime.now().year-1]
df_cr2 = df_cr2[['Year','Country','Event']]
df_cr2

# finland
lst_data_fi = pd.DataFrame(lst_data_fi, columns=header_fi)
df_fi = lst_data_fi.copy()
df_fi1 = df_fi.dropna()
df_fi1.dropna()
df_fi1 = df_fi1.drop(columns='Date')
df_fi1['Country'] = "Finland"
df_fi2 = df_fi1[df_fi1['Year'].astype(int)<datetime.now().year-1]
df_fi2 = df_fi2[['Year','Country','Event']]
df_fi2

# bulgary
lst_data_bu = pd.DataFrame(lst_data_bu, columns=header_bu)
df_bu = lst_data_bu.copy()
df_bu1 = df_bu.dropna()
df_bu1.dropna()
df_bu1 = df_bu1.drop(columns='Date')
df_bu1['Country'] = "Bulgary"
df_bu2 = df_bu1[df_bu1['Year'].astype(int)<datetime.now().year-1]
df_bu2 = df_bu2[['Year','Country','Event']]
df_bu2

# bulgary
lst_data_ma = pd.DataFrame(lst_data_ma, columns=header_ma)
df_ma = lst_data_ma.copy()
df_ma1 = df_ma.dropna()
df_ma1.dropna()
df_ma1 = df_ma1.drop(columns='Date')
df_ma1['Country'] = "Malta"
df_ma2 = df_ma1[df_ma1['Year'].astype(int)<datetime.now().year-1]
df_ma2 = df_ma2[['Year','Country','Event']]
#df_ma2

In [21]:
## 20th CENTURY
# portugal
pen_data_pt = pd.DataFrame(pen_data_pt, columns=header_pt)
df_pt3 = pen_data_pt.copy()
df_pt4 = df_pt3.dropna()
df_pt4.dropna()
df_pt4 = df_pt4.drop(columns='Date')
df_pt4['Country'] = "Portugal"
df_pt5 = df_pt4[df_pt4['Year'].astype(int)>1945]
df_pt5 = df_pt5[['Year','Country','Event']]
df_pt5

# germany
pen_data_de = pd.DataFrame(pen_data_de, columns=header_de)
df_de3 = pen_data_de.copy()
df_de4 = df_de3.dropna()
df_de4.dropna()
df_de4 = df_de4.drop(columns=['Date','Source'])
df_de4['Country'] = "Germany"
df_de5 = df_de4[df_de4['Year'].astype(int)>1945]
df_de5 = df_de5[['Year','Country','Event']]
df_de5

# sweden
pen_data_sw = pd.DataFrame(pen_data_sw, columns=header_sw)
df_sw3 = pen_data_sw.copy()
df_sw4 = df_sw3.dropna()
df_sw4.dropna()
df_sw4 = df_sw4.drop(columns='Date')
df_sw4['Country'] = "Sweden"
df_sw5 = df_sw4[df_sw4['Year'].astype(int)>1945]
df_sw5 = df_sw5[['Year','Country','Event']]
df_sw5

# switzerland
pen_data_su = pd.DataFrame(pen_data_su, columns=header_su)
df_su3 = pen_data_su.copy()
df_su4 = df_su3.dropna()
df_su4.dropna()
df_su4 = df_su4.drop(columns='Date')
df_su4['Country'] = "Switzerland"
df_su5 = df_su4[df_su4['Year'].astype(int)>1945]
df_su5 = df_su5[['Year','Country','Event']]
df_su5

# austria
pen_data_au = pd.DataFrame(pen_data_au, columns=header_au)
df_au3 = pen_data_au.copy()
df_au4 = df_au3.dropna()
df_au4.dropna()
df_au4 = df_au4.drop(columns='Date')
df_au4['Country'] = "Austria"
df_au5 = df_au4[df_au4['Year'].astype(int)>1945]
df_au5 = df_au5[['Year','Country','Event']]
df_au5

# russia
pen_data_ru = pd.DataFrame(pen_data_ru, columns=header_ru)
df_ru3 = pen_data_ru.copy()
df_ru4 = df_ru3.dropna()
df_ru4.dropna()
df_ru4 = df_ru4.drop(columns='Date')
df_ru4['Country'] = "Russia"
df_ru5 = df_ru4[df_ru4['Year'].astype(int)>1945]
df_ru5 = df_ru5[['Year','Country','Event']]
df_ru5

# spain
pen_data_sp = pd.DataFrame(pen_data_sp, columns=header_sp)
df_sp3 = pen_data_sp.copy()
df_sp4 = df_sp3.dropna()
df_sp4.dropna()
df_sp4 = df_sp4.drop(columns='Date')
df_sp4['Country'] = "Spain"
df_sp5 = df_sp4[df_sp4['Year'].astype(int)>1945]
df_sp5 = df_sp5[['Year','Country','Event']]
df_sp5

# france
pen_data_fr = pd.DataFrame(pen_data_fr, columns=header_fr)
df_fr3 = pen_data_fr.copy()
df_fr4 = df_fr3.dropna()
df_fr4.dropna()
df_fr4 = df_fr4.drop(columns='Date')
df_fr4['Country'] = "France"
df_fr5 = df_fr4[df_fr4['Year'].astype(int)>1945]
df_fr5 = df_fr5[['Year','Country','Event']]
df_fr5

# albania
pen_data_al = pd.DataFrame(pen_data_al, columns=header_al)
df_al3 = pen_data_al.copy()
df_al4 = df_al3.dropna()
df_al4.dropna()
df_al4 = df_al4.drop(columns='Date')
df_al4['Country'] = "Albania"
df_al5 = df_al4[df_al4['Year'].astype(int)>1945]
df_al5 = df_al5[['Year','Country','Event']]
df_al5

# cyprus
pen_data_cy = pd.DataFrame(pen_data_cy, columns=header_cy)
df_cy3 = pen_data_cy.copy()
df_cy4 = df_cy3.dropna()
df_cy4.dropna()
df_cy4 = df_cy4.drop(columns='Date')
df_cy4['Country'] = "Cyprus"
df_cy5 = df_cy4[df_cy4['Year'].astype(int)>1945]
df_cy5 = df_cy5[['Year','Country','Event']]
df_cy5

# estonia
pen_data_es = pd.DataFrame(pen_data_es, columns=header_es)
df_es3 = pen_data_es.copy()
df_es4 = df_es3.dropna()
df_es4.dropna()
df_es4 = df_es4.drop(columns='Date')
df_es4['Country'] = "Estonia"
df_es5 = df_es4[df_es4['Year'].astype(int)>1945]
df_es5 = df_es5[['Year','Country','Event']]
df_es5

# slovenia
pen_data_sl = pd.DataFrame(pen_data_sl, columns=header_sl)
df_sl3 = pen_data_sl.copy()
df_sl4 = df_sl3.dropna()
df_sl4.dropna()
df_sl4 = df_sl4.drop(columns='Date')
df_sl4['Country'] = "Slovenia"
df_sl5 = df_sl4[df_sl4['Year'].astype(int)>1945]
df_sl5 = df_sl5[['Year','Country','Event']]
df_sl5

# latvia
pen_data_lt = pd.DataFrame(pen_data_lt, columns=header_lt)
df_lt3 = pen_data_lt.copy()
df_lt4 = df_lt3.dropna()
df_lt4.dropna()
df_lt4 = df_lt4.drop(columns='Date')
df_lt4['Country'] = "Latvia"
df_lt5 = df_lt4[df_lt4['Year'].astype(int)>1945]
df_lt5 = df_lt5[['Year','Country','Event']]
df_lt5

# serbia
pen_data_sb = pd.DataFrame(pen_data_sb, columns=header_sb)
df_sb3 = pen_data_sb.copy()
df_sb4 = df_sb3.dropna()
df_sb4.dropna()
df_sb4 = df_sb4.drop(columns='Date')
df_sb4['Country'] = "Serbia"
df_sb5 = df_sb4[df_sb4['Year'].astype(int)>1945]
df_sb5 = df_sb5[['Year','Country','Event']]
df_sb5

# italy
pen_data_it = pd.DataFrame(pen_data_it, columns=header_it)
df_it3 = pen_data_it.copy()
df_it4 = df_it3.dropna()
df_it4.dropna()
df_it4 = df_it4.drop(columns='Date')
df_it4['Country'] = "Italy"
df_it5 = df_it4[df_it4['Year'].astype(int)>1945]
df_it5 = df_it5[['Year','Country','Event']]
df_it5

# iceland
pen_data_ic = pd.DataFrame(pen_data_ic, columns=header_ic)
df_ic3 = pen_data_ic.copy()
df_ic4 = df_ic3.dropna()
df_ic4.dropna()
df_ic4 = df_ic4.drop(columns='Date')
df_ic4['Country'] = "Iceland"
df_ic5 = df_ic4[df_ic4['Year'].astype(int)>1945]
df_ic5 = df_ic5[['Year','Country','Event']]
df_ic5

# ireland
pen_data_ir = pd.DataFrame(pen_data_ir, columns=header_ir)
df_ir3 = pen_data_ir.copy()
df_ir4 = df_ir3.dropna()
df_ir4.dropna()
df_ir4 = df_ir4.drop(columns='Date')
df_ir4['Country'] = "Ireland"
df_ir5 = df_ir4[df_ir4['Year'].astype(int)>1945]
df_ir5 = df_ir5[['Year','Country','Event']]
df_ir5

# belgium
pen_data_be = pd.DataFrame(pen_data_be, columns=header_be)
df_be3 = pen_data_be.copy()
df_be4 = df_be3[df_be3['Year']!=""]
df_be4['Country'] = "Belgium"
df_be4 = df_be4[['Year','Country','Event']]
df_be5 = df_be4[df_be4['Year'].astype(int)>1945]
df_be5

# poland
pen_data_po = pd.DataFrame(pen_data_po, columns=header_po)
df_po3 = pen_data_po.copy()
df_po4 = df_po3.dropna()
df_po4.dropna()
df_po4 = df_po4.drop(columns='Date')
df_po4['Country'] = "Poland"
df_po5 = df_po4[df_po4['Year'].astype(int)>1945]
df_po5 = df_po5[['Year','Country','Event']]
df_po5

# romania
pen_data_ro = pd.DataFrame(pen_data_ro, columns=header_ro)
df_ro3 = pen_data_ro.copy()
df_ro4 = df_ro3.dropna()
df_ro4.dropna()
df_ro4 = df_ro4.drop(columns='Date')
df_ro4['Country'] = "Romania"
df_ro4['Year'].iloc[22] = "1942"
df_ro5 = df_ro4[df_ro4['Year'].astype(int)>1945]
df_ro5 = df_ro5[['Year','Country','Event']]
df_ro5

# uk
pen_data_uk = pd.DataFrame(pen_data_uk, columns=header_uk)
df_uk3 = pen_data_uk.copy()
df_uk4 = df_uk3.dropna()
df_uk4.dropna()
df_uk4 = df_uk4.drop(columns='Date')
df_uk4['Country'] = "United Kingdom"
df_uk4['Year'].iloc[59] = "1984"
df_uk5 = df_uk4[df_uk4['Year'].astype(int)>1945]
df_uk5 = df_uk5[['Year','Country','Event']]
df_uk5

# croatia
pen_data_cr = pd.DataFrame(pen_data_cr, columns=header_cr)
df_cr3 = pen_data_cr.copy()
df_cr4 = df_cr3.dropna()
df_cr4.dropna()
df_cr4 = df_cr4.drop(columns='Date')
df_cr4['Country'] = "Croatia"
df_cr5 = df_cr4[df_cr4['Year'].astype(int)>1945]
df_cr5 = df_cr5[['Year','Country','Event']]
df_cr5

# finland
pen_data_fi = pd.DataFrame(pen_data_fi, columns=header_fi)
df_fi3 = pen_data_fi.copy()
df_fi4 = df_fi3.dropna()
df_fi4.dropna()
df_fi4 = df_fi4.drop(columns='Date')
df_fi4['Country'] = "Finland"
df_fi5 = df_fi4[df_fi4['Year'].astype(int)>1945]
df_fi5 = df_fi5[['Year','Country','Event']]
df_fi5

# bulgary
pen_data_bu = pd.DataFrame(pen_data_bu, columns=header_bu)
df_bu3 = pen_data_bu.copy()
df_bu4 = df_bu3.dropna()
df_bu4.dropna()
df_bu4 = df_bu4.drop(columns='Date')
df_bu4['Country'] = "Bulgary"
df_bu5 = df_bu4[df_bu4['Year'].astype(int)>1945]
df_bu5 = df_bu5[['Year','Country','Event']]
df_bu5

# malta
pen_data_ma = pd.DataFrame(pen_data_ma, columns=header_ma)
df_ma3 = pen_data_ma.copy()
df_ma4 = df_ma3.dropna()
df_ma4.dropna()
df_ma4 = df_ma4.drop(columns='Date')
df_ma4['Country'] = "Malta"
df_ma5 = df_ma4[16:]
df_ma5 = df_ma5[['Year','Country','Event']]
#df_ma5

### Final table

In [22]:
final_df = pd.concat([df_pt2, df_de2, df_sw2, df_su2, df_au2, df_ru2, df_sp2, df_fr2, df_al2,
                      df_cy2, df_es2, df_sl2, df_lt2, df_sb2, df_it2, df_ic2, df_ir2, df_be2,
                      df_po2, df_ro2, df_uk2, df_cr2, df_fi2, df_bu2, df_ma2,
                      df_pt5, df_de5, df_sw5, df_su5, df_au5, df_ru5, df_sp5, df_fr5, df_al5,
                      df_cy5, df_es5, df_sl5, df_lt5, df_sb5, df_it5, df_ic5, df_ir5, df_be5,
                      df_po5, df_ro5, df_uk5, df_cr5, df_fi5, df_bu5, df_ma5])

In [23]:
final_df = final_df.sort_values(by=['Year','Country'])

In [24]:
# renomeando colunas para retirar espaços
final_df1 = final_df.rename(columns={"Year": "year_event",
                                     "Country": "country_event",
                                     "Event": "event"})

In [39]:
df_manu = pd.read_csv('eventos_manuais.csv')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 134: invalid start byte