**Authors**: <br>
    Iryna Savchuk | 20211310 <br>
    Cátia Parrinha | 20201320 <br>
    Gueu...

**Table of Contents** <br>
* [Import Libraries](#importlibraries)
* [Age](#age)
* [Gender](#gender)
* [Category](#category)
* [Age by Gender](#agebygender)
* [Cetegory by Gender](#categorybygender)
* [Top winners by Country, Region and Country Income](#topwinners)

<hr>
<a class="anchor" id="importlibraries">
    
# Import libraries
    
</a>

In [1]:
import pandas as pd
import numpy as np

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.figure_factory as ff

import geojson
import country_converter as coco # to convert and match country names

In [2]:
df = pd.read_csv('data/merged.csv')

<hr>
<a class="anchor" id="age">
    
# Age
    
</a>

In [3]:
x = df['prizeAge']
hist_data = dict(type='histogram', x=x, marker=dict(color='silver'))
layout = dict(title=dict(text='Ages Distribution'))
fig_1 = go.Figure(data=hist_data, layout=layout)
fig_1.show(renderer='browser')

<hr>
<a class="anchor" id="gender">
    
# Gender
    
</a>

In [4]:
df['gender'].unique()

array(['male', 'female'], dtype=object)

In [5]:
gender_labels = df['gender'].value_counts()
gender_values = (gender_labels / gender_labels.sum()) * 100
unique_genders = df['gender'].unique()

gender_data = dict(type='pie',
                        labels=unique_genders,
                        values=gender_values,
                        textposition='inside', 
                        hole=0.6,
                        marker=dict(colors=['#333F44', '#37AA9C']) # '#94F3E4'
                        )

gender_layout = dict(title=dict(text='Prizes by Gender')
                  )

gender = go.Figure(data=[gender_data], layout=gender_layout)

gender.show(renderer='browser')

<hr>
<a class="anchor" id="category">
    
# Category 
    
</a>

In [6]:
category_labels = df['category'].value_counts()
category_values = (category_labels / category_labels.sum()) * 100
unique_category = df['category'].unique()

bar_category_data = dict(type='bar',
                        x=unique_category,
                        y=category_values,
                        marker=dict(color=['#333F44', '#4D5A64', '#68757E', '#828F99', '#9DA9B3', '#B7C4CE', '#D2DEE8', '#A9EDE2', '#93d9f2', '#94F3E4']))

bar_category_layout = dict(title=dict(text='Prizes by Category'), xaxis=dict(title='category'), yaxis=dict(title='Percentage'),
                           paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)'
                  )

bar_fig = go.Figure(data=[bar_category_data], layout=bar_category_layout)

bar_fig.show(renderer='browser')

<hr>
<a class="anchor" id="agebygender">
    
# Age by Gender
    
</a>

In [7]:
# Filter data by gender
male_data = df.loc[df['gender'] == 'male', 'prizeAge']
female_data = df.loc[df['gender'] == 'female', 'prizeAge']

# Create traces for each group
male_hist = go.Histogram(x=male_data, name='Male', marker=dict(color='#333F44'))
female_hist = go.Histogram(x=female_data, name='Female', marker=dict(color='#37AA9C'))

# Create layout
layout = go.Layout(title=dict(text='Ages Distribution by Gender'), 
                  paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')

# Create figure and plot
fig_1 = go.Figure(data=[male_hist, female_hist], layout=layout)
fig_1.show(renderer='browser')

In [8]:
fig = px.histogram(df, x = 'prizeAge', color = 'gender', 
                  marginal = 'box', # or violin, rug, box
                  hover_data = df.columns,
                  color_discrete_sequence=['#333F44', '#37AA9C'])

fig.update_layout(plot_bgcolor='rgba(0,0,0,0)')

fig.show(renderer='browser')

In [9]:
fig = go.Figure()
fig.add_trace(go.Histogram(name = 'Male', x=male_data, marker_color = '#333F44'))
fig.add_trace(go.Histogram(name = 'Female', x=female_data, marker_color = '#37AA9C'))

# Overlay both histograms
fig.update_layout(barmode='overlay', plot_bgcolor='rgba(0,0,0,0)')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show(renderer='browser')

In [10]:
# Filter data by gender
male_data = df.loc[df['gender'] == 'male', 'prizeAge']
female_data = df.loc[df['gender'] == 'female', 'prizeAge']

# Group data together
hist_data = [male_data, female_data]

group_labels = ['Male', 'Female']

colors = ['#333F44', '#37AA9C']

# Create distplot with custom bin_size
fig = ff.create_distplot(hist_data, group_labels, show_hist = False, colors = colors)
# Add title
fig.update_layout(title_text = 'Ages Distribution by Gender',  
                  paper_bgcolor='rgba(0,0,0,0)', 
                  plot_bgcolor='rgba(0,0,0,0)')
fig.show(renderer='browser')

<hr>
<a class="anchor" id="categorybygender">
    
# Category by gender
    
</a>

In [11]:
# Filter data by gender
male_data = df.loc[df['gender'] == 'male', :]
female_data = df.loc[df['gender'] == 'female', :]

# Compute category values for each group
male_category_labels = male_data['category'].value_counts()
female_category_labels = female_data['category'].value_counts()

# Create traces for each group
male_bar = go.Bar(x=male_category_labels.index, y=male_category_labels.values, name='Male', marker=dict(color='#333F44'))
female_bar = go.Bar(x=female_category_labels.index, y=female_category_labels.values, name='Female', marker=dict(color='#37AA9C'))

# Create layout
layout = go.Layout(title=dict(text='Prizes by Category'), 
                   xaxis=dict(title='category'), 
                   yaxis=dict(title='Count'),  
                   paper_bgcolor='rgba(0,0,0,0)', 
                   plot_bgcolor='rgba(0,0,0,0)')

# Create figure and plot
fig = go.Figure(data=[male_bar, female_bar], layout=layout)
fig.show(renderer='browser')

In [12]:
fig = px.histogram(df, x='prizeAge', color='gender',
                   marginal='box', # or violin, rug, box
                   hover_data=df.columns,
                   color_discrete_sequence=['#333F44', '#37AA9C'],
                   facet_col='category')  # add facet_col argument

fig.update_layout(plot_bgcolor='rgba(0,0,0,0)')

fig.show(renderer='browser')

<hr>
<a class="anchor" id="topwinners">
    
# Top winners by Country, Region and Country Income
    
</a>

In [13]:
# Compute country counts and sort in descending order
country_counts = df['bornCountry'].value_counts().sort_values(ascending=False)

# Select top 10 countries
top_countries = country_counts.iloc[:10]

# Create bar trace
bar_country_trace = go.Bar(x=top_countries.index, y=top_countries.values, marker=dict(color=['#9DA9B3'] + ['#333F44']*(len(top_countries)-1)))

# Create layout
layout = go.Layout(title=dict(text='Counts by Country of Birth'), 
                   xaxis=dict(title='Country'), 
                   yaxis=dict(title='Count'), 
                   paper_bgcolor='rgba(0,0,0,0)', 
                   plot_bgcolor='rgba(0,0,0,0)')

# Create figure and plot
fig_countries = go.Figure(data=[bar_country_trace], layout=layout)
fig_countries.show(renderer='browser')

In [14]:
# Compute region counts and sort in descending order
region_counts = df['region'].value_counts().sort_values(ascending=False)

# Select top 10 regions
top_regions = region_counts.iloc[:10]

# Create bar trace
bar_region_trace = go.Bar(x=top_regions.index, y=top_regions.values, marker=dict(color=['#9DA9B3'] + ['#333F44']*(len(top_regions)-1)))

# Create layout
layout = go.Layout(title=dict(text='Counts by Regions of Birth'), 
                   xaxis=dict(title='Region'), 
                   yaxis=dict(title='Count'),
                   paper_bgcolor='rgba(0,0,0,0)', 
                   plot_bgcolor='rgba(0,0,0,0)')

# Create figure and plot
fig_regions = go.Figure(data=[bar_region_trace], layout=layout)
fig_regions.show(renderer='browser')

In [15]:
# Compute income counts and sort in descending order
income_counts = df['income'].value_counts().sort_values(ascending=False)

# Select top 10 income
top_income = income_counts.iloc[:10]

# Create bar trace
bar_income_trace = go.Bar(x=top_income.index, y=top_income.values, marker=dict(color=['#9DA9B3'] + ['#333F44']*(len(top_income)-1)))

# Create layout
layout = go.Layout(title=dict(text='Counts by County Income'), 
                   xaxis=dict(title='Region'), 
                   yaxis=dict(title='Count'),
                   paper_bgcolor='rgba(0,0,0,0)', 
                   plot_bgcolor='rgba(0,0,0,0)')

# Create figure and plot
fig_income = go.Figure(data=[bar_income_trace], layout=layout)
fig_income.show(renderer='browser')

In [16]:
# Subplotly this 3 previous graphs

subplot_titles = ['Country', 'Region', 'Income']

sub_1 = make_subplots(rows=1, cols=3, 
                    specs=[[dict(type='xy'),  dict(type='xy'), dict(type='xy')]],
                    subplot_titles=subplot_titles
                   )

sub_1.add_trace(bar_country_trace, row=1, col=1)
sub_1.add_trace(bar_region_trace, row=1, col=2)
sub_1.add_trace(bar_income_trace, row=1, col=3)

sub_1.show(renderer='browser')