## Ruling political parties

In [2]:
import requests
import pandas as pd
import numpy as np
# website_url = requests.get('https://en.wikipedia.org/wiki/List_of_current_heads_of_state_and_government').text
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_ruling_political_parties_by_country').text

In [2]:
# !pip install lxml

In [76]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(website_url,'lxml')

In [79]:
# List of Tuples
table_list = [ ]
for table in soup.find_all('table', class_='wikitable'):
    for row in table.find_all('tr')[2:]:
        data = row.find_all(['th','td'])
        if data[1].a.text == 'United Arab Emirates':
            country = data[1].a.text
            party = 'Federal National Council'
            party_link = '/wiki/Politics_of_the_United_Arab_Emirates'
            table_list.append((country,party,party_link))
        elif len(data) == 8:
            try:
                country = data[1].a.text
                party = data[6].a.text
                party_link = data[6].a['href']
            except IndexError:pass
            except AttributeError:pass
            if country != '':
                table_list.append((country,party,party_link))
#Create a DataFrame object
countries = pd.DataFrame(table_list, columns=['Country', 'Ruling Party', 'Link'])

In [80]:
countries.tail(20)

Unnamed: 0,Country,Ruling Party,Link
199,Turkey,Justice and Development Party,/wiki/Justice_and_Development_Party_(Turkey)
200,Turkmenistan,Democratic Party of Turkmenistan,/wiki/Democratic_Party_of_Turkmenistan
201,Turks and Caicos,People's Democratic Movement,/wiki/People%27s_Democratic_Movement_(Turks_an...
202,Uganda,National Resistance Movement,/wiki/National_Resistance_Movement
203,Ukraine,Servant of the People,/wiki/Servant_of_the_People_(political_party)
204,United Arab Emirates,Federal National Council,/wiki/Politics_of_the_United_Arab_Emirates
205,United Kingdom,Conservative Party,/wiki/Conservative_Party_(UK)
206,Northern Ireland,DUP,/wiki/Democratic_Unionist_Party
207,United States,Republican Party,/wiki/Republican_Party_(United_States)
208,Uruguay,National Party,/wiki/National_Party_(Uruguay)


In [81]:
politics = {}
for index, country in countries.iterrows():
    link = country['Link']
    party_url = requests.get(f'https://en.wikipedia.org{link}').text
    party_soup = BeautifulSoup(party_url,'lxml')
    table = party_soup.find('table', class_='infobox vcard')
    if table != None:
        for row in table.find_all('tr'):
            try:
                if row.a.text == 'Political\xa0position':
                    data = row.find_all(['th','td'])
                    try:
                        spectrum = data[1].find_all('a')
                        pp = []
                        for word in spectrum:
                            if word.text[0] != '[' and word.text != 'citation needed':
                                pp.append(word.text)
                        political_position = pp
                    except IndexError:pass
                    except AttributeError:pass
                    politics[country['Country']] = political_position
                    break
            except:
                pass

In [82]:
not_found = set(countries['Country']) - set(politics.keys()) # to be scraped independently/manually
not_found

{'Afghanistan',
 'Anguilla',
 'Antigua and Barbuda',
 'British Virgin Islands',
 'Cameroon',
 'Central African Republic',
 'Chad',
 'China',
 'Comoros',
 'Cook Islands',
 'Equatorial Guinea',
 'French Guiana',
 'Gambia',
 'Guatemala',
 'Guyana',
 'Hong Kong',
 'Italy',
 'Kiribati',
 'Lesotho',
 'Liberia',
 'Libya',
 'Macau',
 'Madagascar',
 'Marshall Islands',
 'Montserrat',
 'Morocco',
 'New Caledonia',
 'Papua New Guinea',
 'Russia',
 'Saint Kitts and Nevis',
 'Sint Maarten',
 'Solomon Islands',
 'South Sudan',
 'Sudan',
 'Tajikistan',
 'United Arab Emirates',
 'United States',
 'Vanuatu',
 'Virgin Islands of the United States',
 'Wallis and Futuna'}

In [83]:
politics['Italy'] = ['Centre-left'] # (rough average of parties) source:https://en.wikipedia.org/wiki/Conte_II_Cabinet
politics['United States'] = ['Centre-right','right-wing'] # (position of European affiliation) source:https://en.wikipedia.org/wiki/Republican_Party_(United_States)
# politics['Russia'] = ['Big tent'] # (parliament) source: https://en.wikipedia.org/wiki/United_Russia
politics['Cameroon'] = ['Big tent'] # https://en.wikipedia.org/wiki/Cameroon_People%27s_Democratic_Movement
politics['Jamaica'] = ['Centre-right'] # https://en.wikipedia.org/wiki/Jamaica_Labour_Party

In [84]:
# Include list of dictatorships as a political position on their own:
dictatorship_url = requests.get('https://worldpopulationreview.com/countries/dictatorship-countries/').text

In [85]:
soup = BeautifulSoup(dictatorship_url,'lxml')

In [86]:
dictatorship_countries = [country.text for country in soup.find_all('ul')[2].find_all('a')]
dictatorship_countries.extend(['Algeria', 'Equatorial Guinea', 'North Korea', 'Sudan']) 
# should maybe add Brazil, Hungary and Poland
len(dictatorship_countries)

49

In [87]:
len(politics.keys())

182

In [88]:
for state in dictatorship_countries:
    politics[state] = ['Dictatorship']
len(politics.keys())

202

In [90]:
countries['Political Position']= countries['Country'].map(politics)

In [91]:
countries.tail(20)

Unnamed: 0,Country,Ruling Party,Link,Political Position
199,Turkey,Justice and Development Party,/wiki/Justice_and_Development_Party_(Turkey),[Dictatorship]
200,Turkmenistan,Democratic Party of Turkmenistan,/wiki/Democratic_Party_of_Turkmenistan,[Dictatorship]
201,Turks and Caicos,People's Democratic Movement,/wiki/People%27s_Democratic_Movement_(Turks_an...,[Centre]
202,Uganda,National Resistance Movement,/wiki/National_Resistance_Movement,[Dictatorship]
203,Ukraine,Servant of the People,/wiki/Servant_of_the_People_(political_party),[Big tent]
204,United Arab Emirates,Federal National Council,/wiki/Politics_of_the_United_Arab_Emirates,[Dictatorship]
205,United Kingdom,Conservative Party,/wiki/Conservative_Party_(UK),[Centre-right]
206,Northern Ireland,DUP,/wiki/Democratic_Unionist_Party,"[Centre-right, right-wing]"
207,United States,Republican Party,/wiki/Republican_Party_(United_States),"[Centre-right, right-wing]"
208,Uruguay,National Party,/wiki/National_Party_(Uruguay),"[Centre-right, centrist]"


In [92]:
countries.dropna(inplace=True)
countries.shape

(193, 4)

In [None]:
# countries.set_index('Country')

In [82]:
# print(countries['Political Position'].nunique())
# countries['Political Position'].drop_duplicates()

In [93]:
set([item for sublist in list(countries['Political Position'].values) for item in sublist]) # unique values

{'Big tent',
 'Center',
 'Center-left',
 'Center-right',
 'Centre',
 'Centre-left',
 'Centre-right',
 'Dictatorship',
 'Far-left',
 'Left-wing',
 'Right-wing',
 'centre-left',
 'centre-right',
 'centrist',
 'far-left',
 'far-right',
 'left-wing',
 'right-wing'}

In [22]:
poli_to_number = {'Dictatorship': -1,
                 'Big tent': 0,
                 'Far-left': 1,
                 'far-left': 1,
                 'Left-wing': 2,
                 'Left wing': 2,
                 'left-wing': 2,
                 'Centre-left': 3,
                 'Center-left': 3,
                 'centre-left': 3,
                 'Center': 4,
                 'centrist': 4,
                 'Centre': 4,
                 'Centre-right': 5,
                 'Center-right': 5,
                 'centre-right': 5,
                 'Right-wing': 6,
                 'Right wing': 6,
                 'right-wing': 6,
                 'far-right': 7 }

In [95]:
def turn_list_to_number(poli_list):
    numbers = []
    for item in poli_list:
        if item != 'Big tent': # how to account for Big tent in average?
            numbers.append(poli_to_number[item])
    if len(numbers) == 0: # if Big tent is the only party
        return 0
    return sum(numbers) / len(numbers)

In [96]:
countries['poli_numbers'] = countries['Political Position'].apply(lambda x: turn_list_to_number(x))

In [97]:
countries

Unnamed: 0,Country,Ruling Party,Link,Political Position,poli_numbers
0,Afghanistan,Independent,/wiki/Independent_politician,[Dictatorship],-1.0
1,Åland,Liberals of Åland,/wiki/Liberals_of_%C3%85land,[Centre],4.0
2,Albania,Socialist Party of Albania,/wiki/Socialist_Party_of_Albania,"[Centre-left, left-wing]",2.5
3,Algeria,National Liberation Front,/wiki/National_Liberation_Front_(Algeria),[Dictatorship],-1.0
4,Andorra,Democrats for Andorra,/wiki/Democrats_for_Andorra,[Centre-right],5.0
...,...,...,...,...,...
212,Vietnam,Communist Party of Vietnam,/wiki/Communist_Party_of_Vietnam,[Dictatorship],-1.0
215,Western Sahara,Polisario Front,/wiki/Polisario_Front,[Dictatorship],-1.0
216,Yemen,General People's Congress,/wiki/General_People%27s_Congress_(Yemen),[Dictatorship],-1.0
217,Zambia,Patriotic Front,/wiki/Patriotic_Front_(Zambia),[Centre-left],3.0


In [98]:
# countries.to_csv('filtered_countries.csv') # Last saved: 10 May 20

In [3]:
countries = pd.read_csv('filtered_countries.csv', index_col=0)

## Updated covid data

In [4]:
covid = pd.read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv", na_values = "", encoding = "utf-8")

In [7]:
covid

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2018,continentExp
0,31/05/2020,31,5,2020,866,3,Afghanistan,AF,AFG,37172386.0,Asia
1,30/05/2020,30,5,2020,623,11,Afghanistan,AF,AFG,37172386.0,Asia
2,29/05/2020,29,5,2020,580,8,Afghanistan,AF,AFG,37172386.0,Asia
3,28/05/2020,28,5,2020,625,7,Afghanistan,AF,AFG,37172386.0,Asia
4,27/05/2020,27,5,2020,658,1,Afghanistan,AF,AFG,37172386.0,Asia
...,...,...,...,...,...,...,...,...,...,...,...
20497,25/03/2020,25,3,2020,0,0,Zimbabwe,ZW,ZWE,14439018.0,Africa
20498,24/03/2020,24,3,2020,0,1,Zimbabwe,ZW,ZWE,14439018.0,Africa
20499,23/03/2020,23,3,2020,0,0,Zimbabwe,ZW,ZWE,14439018.0,Africa
20500,22/03/2020,22,3,2020,1,0,Zimbabwe,ZW,ZWE,14439018.0,Africa


In [5]:
covid.groupby('countriesAndTerritories').sum()[['cases', 'deaths']].loc['Italy'] # sanity check

cases     232664
deaths     33340
Name: Italy, dtype: int64

In [6]:
covid.groupby('countriesAndTerritories').sum()[['cases', 'deaths']].loc['United_Kingdom'] # sanity check

cases     272826
deaths     38376
Name: United_Kingdom, dtype: int64

In [8]:
cases_deaths = covid.groupby('countriesAndTerritories').sum()[['cases', 'deaths']]

In [9]:
population = covid[['countriesAndTerritories', 'popData2018', 'continentExp']
                  ].drop_duplicates().set_index('countriesAndTerritories')
population.columns = ['population2018', 'continent']

In [10]:
cov_table = pd.concat([cases_deaths, population], axis=1)

In [11]:
cov_table.dropna(inplace=True)
cov_table.shape

(205, 4)

In [12]:
cov_table['%cases'] = cov_table['cases']/cov_table['population2018']
cov_table['%deaths'] = cov_table['deaths']/cov_table['population2018']
cov_table['mortality'] = cov_table['deaths']/cov_table['cases']

In [13]:
spaced_names = [' '.join((name).split('_')) for name in cov_table.index]
cov_table.index = spaced_names
cov_table.tail(10)

Unnamed: 0,cases,deaths,population2018,continent,%cases,%deaths,mortality
United Republic of Tanzania,509,21,56318348.0,Africa,9e-06,3.728803e-07,0.041257
United States Virgin Islands,69,6,106977.0,America,0.000645,5.608682e-05,0.086957
United States of America,1770384,103781,327167434.0,America,0.005411,0.0003172107,0.058621
Uruguay,821,22,3449299.0,America,0.000238,6.378108e-06,0.026797
Uzbekistan,3554,14,32955400.0,Asia,0.000108,4.248166e-07,0.003939
Venezuela,1459,14,28870195.0,America,5.1e-05,4.849292e-07,0.009596
Vietnam,327,0,95540395.0,Asia,3e-06,0.0,0.0
Yemen,310,77,28498687.0,Asia,1.1e-05,2.701879e-06,0.248387
Zambia,1057,7,17351822.0,Africa,6.1e-05,4.034158e-07,0.006623
Zimbabwe,174,4,14439018.0,Africa,1.2e-05,2.770271e-07,0.022989


In [14]:
cov_table.rename(index={'United States of America':'United States', 'Czechia':'Czech Republic', 
                        'Guinea Bissau': 'Guinea-Bissau', "Cote dIvoire": "Côte d'Ivoire" },inplace=True)
cov_table.tail(10)

Unnamed: 0,cases,deaths,population2018,continent,%cases,%deaths,mortality
United Republic of Tanzania,509,21,56318348.0,Africa,9e-06,3.728803e-07,0.041257
United States Virgin Islands,69,6,106977.0,America,0.000645,5.608682e-05,0.086957
United States,1770384,103781,327167434.0,America,0.005411,0.0003172107,0.058621
Uruguay,821,22,3449299.0,America,0.000238,6.378108e-06,0.026797
Uzbekistan,3554,14,32955400.0,Asia,0.000108,4.248166e-07,0.003939
Venezuela,1459,14,28870195.0,America,5.1e-05,4.849292e-07,0.009596
Vietnam,327,0,95540395.0,Asia,3e-06,0.0,0.0
Yemen,310,77,28498687.0,Asia,1.1e-05,2.701879e-06,0.248387
Zambia,1057,7,17351822.0,Africa,6.1e-05,4.034158e-07,0.006623
Zimbabwe,174,4,14439018.0,Africa,1.2e-05,2.770271e-07,0.022989


In [82]:
# !conda install -c bokeh bokeh -y

## Putting it all together

In [15]:
country_parties = countries.set_index('Country')[['Ruling Party', 'poli_numbers']]
country_parties.tail()

Unnamed: 0_level_0,Ruling Party,poli_numbers
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Vietnam,Communist Party of Vietnam,-1.0
Western Sahara,Polisario Front,-1.0
Yemen,General People's Congress,-1.0
Zambia,Patriotic Front,3.0
Zimbabwe,Zimbabwe African National Union – Patriotic Front,2.0


In [16]:
cov_table.index.rename('Country', inplace=True)
cov_table.tail()

Unnamed: 0_level_0,cases,deaths,population2018,continent,%cases,%deaths,mortality
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Venezuela,1459,14,28870195.0,America,5.1e-05,4.849292e-07,0.009596
Vietnam,327,0,95540395.0,Asia,3e-06,0.0,0.0
Yemen,310,77,28498687.0,Asia,1.1e-05,2.701879e-06,0.248387
Zambia,1057,7,17351822.0,Africa,6.1e-05,4.034158e-07,0.006623
Zimbabwe,174,4,14439018.0,Africa,1.2e-05,2.770271e-07,0.022989


In [17]:
full_table = pd.concat([country_parties, cov_table], axis=1, join='inner')
full_table.tail()

Unnamed: 0_level_0,Ruling Party,poli_numbers,cases,deaths,population2018,continent,%cases,%deaths,mortality
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Venezuela,United Socialist Party of Venezuela,-1.0,1459,14,28870195.0,America,5.1e-05,4.849292e-07,0.009596
Vietnam,Communist Party of Vietnam,-1.0,327,0,95540395.0,Asia,3e-06,0.0,0.0
Yemen,General People's Congress,-1.0,310,77,28498687.0,Asia,1.1e-05,2.701879e-06,0.248387
Zambia,Patriotic Front,3.0,1057,7,17351822.0,Africa,6.1e-05,4.034158e-07,0.006623
Zimbabwe,Zimbabwe African National Union – Patriotic Front,2.0,174,4,14439018.0,Africa,1.2e-05,2.770271e-07,0.022989


In [18]:
country_parties.shape

(193, 2)

In [19]:
full_table.shape

(169, 9)

In [103]:
set(country_parties.index) - set(full_table.index) # checking non matching countries (covid data unavailable)

{'Artsakh',
 'Bonaire',
 'Burma',
 'Congo (Brazzaville)',
 'Congo (Kinshasa)',
 'East Timor',
 'Eritrea',
 'European Union',
 'Guadeloupe',
 'Mayotte',
 'Nauru',
 'North Korea',
 'Northern Cyprus',
 'Northern Ireland',
 'Saint Helena',
 'Saint Pierre and Miquelon',
 'Samoa',
 'São Tomé and Príncipe',
 'Tanzania',
 'Transnistria',
 'Turkmenistan',
 'Turks and Caicos',
 'Western Sahara',
 'Åland'}

In [104]:
set(cov_table.index) - set(full_table.index) # non matching countries (political data unavailable)

{'Antigua and Barbuda',
 'Bahrain',
 'Belarus',
 'British Virgin Islands',
 'Brunei Darussalam',
 'Cases on an international conveyance Japan',
 'Comoros',
 'Congo',
 'Democratic Republic of the Congo',
 'Eswatini',
 'Gambia',
 'Guam',
 'Guatemala',
 'Guernsey',
 'Guyana',
 'Holy See',
 'Kuwait',
 'Liberia',
 'Madagascar',
 'Montserrat',
 'Morocco',
 'Myanmar',
 'New Caledonia',
 'Northern Mariana Islands',
 'Oman',
 'Papua New Guinea',
 'Qatar',
 'Saint Kitts and Nevis',
 'Sao Tome and Principe',
 'Saudi Arabia',
 'Sint Maarten',
 'Timor Leste',
 'Turks and Caicos islands',
 'United Republic of Tanzania',
 'United States Virgin Islands'}

In [20]:
full_table.poli_numbers.max()

6.5

In [23]:
numbers_to_poli = {v: (k[0].upper() + k[1:]) for k,v in poli_to_number.items()}
numbers_to_poli

{-1: 'Dictatorship',
 0: 'Big tent',
 1: 'Far-left',
 2: 'Left-wing',
 3: 'Centre-left',
 4: 'Centre',
 5: 'Centre-right',
 6: 'Right-wing',
 7: 'Far-right'}

In [24]:
full_table.rename(columns={'Ruling Party': 'ruling_party'}, inplace=True)

In [50]:
# Bokeh Libraries
from bokeh.io import output_file, output_notebook, export_png
from bokeh.plotting import figure, show
from bokeh.models import HoverTool
from bokeh.models import ColumnDataSource, NumeralTickFormatter
from bokeh.layouts import gridplot

# output_file('filename.html')  # Render to static HTML, or 
output_notebook()  # Render inline in a Jupyter Notebook

def scatter_plot(y_axis_column: str = '%cases'):
    for i in range(2,4):
        if full_table[y_axis_column].values.max().round(decimals=i) > 0:
            if full_table[y_axis_column].values.max() < full_table[y_axis_column].values.max().round(decimals=i):
                max_y = full_table[y_axis_column].values.max().round(decimals=i)
            else:
                max_y = full_table[y_axis_column].values.max().round(decimals=i) + 0.5*10**(-i)
            break
    # Store the data in a ColumnDataSource
    covid_data = ColumnDataSource(full_table)

    # output_file(f'{y_axis_column} table.html', title=f'{y_axis_column} table')
    
    if y_axis_column == 'mortality':
        title_string = f'{y_axis_column} by ruling political party'
    else:
        title_string = f'% of {y_axis_column[1:]} by ruling political party'
    # Create a figure with no toolbar and axis ranges of [0,3]
    fig = figure(title=title_string,
                 plot_height=800, plot_width=800,
                 x_range=(-1.5, 7), y_range=(0, max_y),
                 x_axis_label='Political Position')

    # Format the tooltip
    tooltips = [
                ('Country','@Country'),
                ('Ruling Party', '@ruling_party')
        # TODO add numeric value
               ]

    # Draw the coordinates as circles
    fig.circle(x='poli_numbers', y=y_axis_column, size=10, alpha=0.5, source=covid_data)


    # Add the HoverTool to the figure
    fig.add_tools(HoverTool(tooltips=tooltips))
    
    # Format the y-axis tick labels
    fig.yaxis[0].formatter = NumeralTickFormatter(format='0.0000')
    
    # Custom labels for xaxis
    fig.xaxis.ticker = list(numbers_to_poli.keys())
    fig.xaxis.major_label_overrides = numbers_to_poli
    # Show plot
#     show(fig)
    return fig

In [44]:
cases = scatter_plot()

In [45]:
deaths = scatter_plot('%deaths')

In [46]:
mortality = scatter_plot('mortality')

In [53]:
# !conda install selenium -y

In [56]:
output_file("covid-by-country.html", title="Covid visualization by country")

output_notebook()
gplot=gridplot([cases, deaths, mortality], sizing_mode='stretch_both', ncols=3)#, plot_width=800, plot_height=600)
show(gplot)  # open a browser
# export_png(gplot, filename="covid-by-country.png")

## Statistical Analysis
Work in progress

In [29]:
sample_sizes = full_table.groupby('poli_numbers').count()['ruling_party']
keep_samples = sample_sizes[sample_sizes >= 10].rename('sample sizes')

In [30]:
keep_samples.index

Float64Index([-1.0, 2.5, 3.0, 4.0, 4.5, 5.0, 5.5, 6.0], dtype='float64', name='poli_numbers')

In [31]:
subsample_table = full_table[full_table['poli_numbers'].isin(keep_samples.index)]

In [32]:
test_column = 'mortality'
overall_mean = subsample_table[test_column].mean()
def compute_squares(value, mean):
    return (value-mean)**2

In [33]:
SST = subsample_table[test_column].apply(lambda x: compute_squares(x, overall_mean)).sum()
dof = subsample_table.shape[0]

In [34]:
group_means = subsample_table.groupby('poli_numbers').mean()[test_column]
ssw = []
ssb = []
for party_number in keep_samples.index:
    ssw.append(subsample_table[subsample_table['poli_numbers'] == party_number][test_column].apply(lambda x: 
                                                                compute_squares(x, group_means[party_number])).sum())
    ssb.append(subsample_table[subsample_table['poli_numbers'] == party_number][test_column].apply(lambda x: 
                                                                compute_squares(group_means[party_number], overall_mean)).sum())
SSW = sum(ssw)
SSB = sum(ssb)

In [35]:
dofb = len(keep_samples.index) - 1
dofw = 0
for i in keep_samples:
    dofw = dofw + i-1

In [36]:
keep_samples.index

Float64Index([-1.0, 2.5, 3.0, 4.0, 4.5, 5.0, 5.5, 6.0], dtype='float64', name='poli_numbers')

In [37]:
SSB

0.010074959118830797

In [38]:
SSW

0.22315768784072057

In [39]:
SSB + SSW

0.23323264695955137

In [40]:
# Null Hp: means are the same
# Alternative Hp: means are different
# alpha = 0.05
F_stat = (SSB/dofb)/(SSW/dofw)

In [41]:
Fc = 2.08 # automate this (scipy)

In [42]:
F_stat

0.9351933324213613