# Data Visualization tasks

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt

## Article: Nuclear Power Gets a Fresh Look as Nations Chase Climate Goals

Some of the concepts concerning the article that can be visualized with data are:
 * Amount of nuclear energy generation (TWh)
 * What share of primary energy comes from nuclear?
 * What share of electricity comes from nuclear?

### Load data

In [2]:
# LOAD DATA
# Nuclear energy generation (TWh)
df1 = pd.read_csv("./../../data/nuclear-energy-generation.csv")
# Share of primary energy 
df2 = pd.read_csv("./../../data/nuclear-primary-energy.csv")
# Share of electricity
df3 = pd.read_csv("./../../data/share-electricity-nuclear.csv")

### Data wrangling and cleansing 

In [3]:
# REMOVE NAN VALUES
df1.dropna(inplace=True)
df2.dropna(inplace=True)
df3.dropna(inplace=True)

#### Top 20 (during 2020)

In [4]:
# Filter by year
df1 = df1.query("Year == 2020")
df2 = df2.query("Year == 2020")
df3 = df3.query("Year == 2020")

**Countries who generate the most of nuclear energy (TWh)**

In [5]:
df1.query("Entity != 'World'")\
   .groupby('Entity')\
   .mean()\
   .sort_values('Electricity from nuclear (TWh)', ascending = False)\
   .head(20).reset_index()

Unnamed: 0,Entity,Year,Electricity from nuclear (TWh)
0,United States,2020.0,789.88
1,China,2020.0,366.2
2,France,2020.0,353.83
3,Russia,2020.0,215.74
4,South Korea,2020.0,152.33
5,Canada,2020.0,92.65
6,Ukraine,2020.0,76.2
7,Germany,2020.0,64.38
8,Spain,2020.0,58.3
9,United Kingdom,2020.0,50.84


**Countries and their share of primary energy that comes from nuclear (%)**

In [6]:
df2.query("Entity != 'World'")\
   .groupby('Entity')\
   .mean()\
   .sort_values('Nuclear (% sub energy)', ascending = False)\
   .head(20).reset_index()

Unnamed: 0,Entity,Year,Nuclear (% sub energy)
0,France,2020.0,36.105
1,Sweden,2020.0,21.738
2,Slovakia,2020.0,21.61
3,Bulgaria,2020.0,21.413
4,Slovenia,2020.0,21.029
5,Ukraine,2020.0,20.423
6,Finland,2020.0,19.067
7,Switzerland,2020.0,18.943
8,Czechia,2020.0,17.354
9,Hungary,2020.0,14.732


**Countries and their share of electricity that comes from nuclear energy (%)**

In [7]:
df3.query("Entity != 'World'")\
   .groupby('Entity')\
   .mean()\
   .sort_values('Nuclear (% electricity)', ascending = False)\
   .head(20).reset_index()

Unnamed: 0,Entity,Year,Nuclear (% electricity)
0,France,2020.0,67.175
1,Slovakia,2020.0,53.986
2,Ukraine,2020.0,51.396
3,Hungary,2020.0,46.176
4,Bulgaria,2020.0,41.337
5,Belgium,2020.0,39.196
6,Slovenia,2020.0,37.596
7,Czechia,2020.0,37.503
8,Armenia,2020.0,34.836
9,Finland,2020.0,33.891


The countries that are going to be visualized are the ones with the highest nuclear power production in 2020

In [8]:
df = df1.query("Entity != 'World'")\
   .groupby('Entity')\
   .mean()\
   .sort_values('Electricity from nuclear (TWh)', ascending = False)\
   .head(20).reset_index()

# Join with the other data frames
df = pd.merge(left=df, right=df2, on='Entity', how = 'left')\
     .drop(columns=['Year_x', 'Year_y'])

df = pd.merge(left=df, right=df3, on='Entity', how = 'left')\
     .drop(columns=['Code_x', 'Year'])\
     .rename(columns={'Code_y':'Code'})

#### Rename columns

In [9]:
df = df.rename(columns={'Electricity from nuclear (TWh)':'nuclear_energy_generation_TWh',
                        'Nuclear (% sub energy)': 'share_primary_energy_nuclear_percentage',
                        'Nuclear (% electricity)': 'share_electricity_from_nuclear_percentage',
                        'Entity': 'country'})

#### Create new categories

To create the visualization, the data should be organized in descending order in at least two field

In [10]:
# Position -> nuclear generation
df['position_nuclear_energy_generation'] = pd.Series(np.arange(1,21))

# Position -> share primary energy percentage
df = df.sort_values('share_primary_energy_nuclear_percentage', ascending=False).reset_index(drop=True)
df['position_share_primary_energy'] = pd.Series(np.arange(1,21))

# Position -> share electricity from nuclear
df = df.sort_values('share_electricity_from_nuclear_percentage', ascending=False).reset_index(drop=True)
df['position_share_electricity_nuclear'] = pd.Series(np.arange(1,21))

In [11]:
# Create a category for the continent
df['Continent'] = pd.Series(['Europe','Europe','Europe','Europe','Europe','Europe','Europe','Europe','Europe','Asia','Europe','Europe','The Americas',
                             'Europe','The Americas','Europe','Asia','Asia','Asia','Asia'])

### Visualize the data

In [12]:
caption = alt.Chart({'values':[{}]}).mark_text(align='left', font='Georgia', fontStyle='italic', fontSize=10)\
          .encode(x=alt.value(480),y=alt.value(650),text=alt.value(['Data: Our World in Data','Visualization by Isaac Arroyo','(@unisaacarroyov)']))

In [13]:
base = alt.Chart(data=df).encode(x = alt.X(shorthand='position_nuclear_energy_generation',
                                           scale = alt.Scale(reverse=False),
                                           axis = alt.Axis(title="Nuclear Energy Generation (Position)")
                                          ),
                                 y = alt.Y(shorthand='position_share_primary_energy',
                                           scale = alt.Scale(reverse=True),
                                           axis = alt.Axis(title=["Primary", "Energy", "Coming", "From", "Nuclear", "Sources", "(Position)"])
                                          ),
                                 text = alt.Text(shorthand='country'),
                                 tooltip = [alt.Tooltip(shorthand='country', title = 'Country'),
                                            alt.Tooltip(shorthand='nuclear_energy_generation_TWh', title = 'Nuclear Energy Generation (TWh)'),
                                            alt.Tooltip(shorthand='share_primary_energy_nuclear_percentage', title = 'Primary Energy Coming From Nuclear Sources (%)'),
                                            alt.Tooltip(shorthand='share_electricity_from_nuclear_percentage', title = 'Electricity That Comes From Nuclear Energy (%)')
                                           ]
                                )

In [14]:
circle_chart = base.mark_circle(opacity=1).encode(size = alt.Size(shorthand='share_electricity_from_nuclear_percentage',
                                                                  scale= alt.Scale(range=[60,1200]),
                                                                  legend= alt.Legend(orient="top", direction = "horizontal",
                                                                                     title= ["Percentage Of Electricity", "That Comes From", "Nuclear Energy"],
                                                                                     titleFontSize= 15, labelFontSize= 13
                                                                                    )
                                                                 ),
                                                  color = alt.Color(shorthand='Continent',
                                                                    scale = alt.Scale(range=['#fab255','#0f7ba2','#dd5129']),
                                                                    legend = alt.Legend(orient='top',
                                                                                        title = ["Continent"],
                                                                                        titleFontSize= 15, labelFontSize= 13,
                                                                                        columns =1
                                                                                       )
                                                                   ),
                                        )

In [15]:
text_chart = base.mark_text(dy=-23, fontWeight='bold', fontSize=13)

In [16]:
final_chart = circle_chart + text_chart + caption
final_chart = final_chart.properties(width= 580, height=580,
                       title= alt.TitleParams(text='Nuclear Energy: Nations producing and using it the most',
                                              fontSize=30, offset=20, anchor='middle',
                                              subtitle= ["In 2020, the world generated more than 2600 TWh of nuclear energy, it accounted for just 4% of primary",
                                                         "energy consumption; and 10% of electricity came from it.",
                                                         "The visualization shows the 20 countries that generated the most nuclear power in 2020. Each point",
                                                         "represents a country and the color represents the country's continent. On the X-axis is the country's",
                                                         "position regarding their nuclear generation; on the Y-axis is the country’s position regarding the",
                                                         "share of primary energy that comes from nuclear. Finally, the size represents the percentage of electricity",
                                                         "that comes from nuclear power."
                                                        ],
                                              subtitleFontSize= 15, subtitlePadding= 10
                                             )
                      )\
.configure(font='Optima', background='white')\
.configure_view(stroke=None)\
.configure_axisY(titleAngle=0, titlePadding=-70, titleFontSize=20, labelPadding=20)\
.configure_axisX(titlePadding = 10, titleFontSize = 20, labelPadding=20)\
.configure_axis(ticks=False, domain=False, gridOpacity=0.1,
                gridColor='black', gridDash = [5],
                labelFontSize=13)
final_chart

In [17]:
# Export the chart as html
# final_chart.save("./task_bloomberg_chart_01.html")

In [18]:
# Export the data (just in case)
# df.to_csv("./../../data/top20_countries_nuclear_energy.csv", index=False)

## Article: Use of online platforms, apps varies – sometimes widely – by demographic group

![](https://www.pewresearch.org/internet/wp-content/uploads/sites/9/2021/04/PI_2021.04.07_social-media_0-03.png?w=640)

In [19]:
# List of values
list_values = [81,82,80,79,84,85,95,91,83,49,75,83,79,90,70,86,89,84,81,74,
          69,61,77,67,74,72, 70,77,73,50,70,76,61,70,64,71,73,70,70,67,
          40,36,44,35,49,52,71,48,29,13,35,45,39,47,30,44,49,45,41,25,
          31,16,46,34,35,18,32,34,38,18,21,33,29,40,22,36,37,30,32,34,
          28,31,26,29,27,19,30,36,33,11,12,21,21,50,10,28,51,30,33,15,
          25,22,28,23,26,31,65,24,12,2,25,27,29,28,21,32,23,28,25,18,
          23,25,22,22,29,23,42,27,18,7,12,29,22,34,14,26,33,27,23,18,
          23,26,21,16,23,46,24,40,23,10,23,20,19,29,20,16,33,28,23,9,
          21,17,24,18,30,31,48,22,14,4,22,29,29,29,21,24,19,24,20,16,
          18,23,12,17,17,14,36,22,10,3,10,17,20,26,9,20,26,18,21,10,
          13,10,16,15,10,8,5,17,16,8,6,11,12,20,4,12,24,17,14,2
         ]


list_platform = ['Youtube'] * 20 + ['Facebook']*20 + ['Instagram']*20 +\
                ['Pinterest']*20 + ['LinkedIn']*20 + ['Snapchat']*20 + ['Twitter']*20 +\
                ['WhatsApp']*20 + ['TikTok']*20 + ['Reddit']*20 + ['Nextdoor']*20

list_group = ['Total', 'Men', 'Women','White','Black','Hispanic',
              '18-29 years old', '30-49 years old', '50-64 years old', '65+ years old',
              '<$30K','$30K-$49,999','$50K-74,999','$75K+',
              'High School or less','Some college','College+',
              'Urban','Suburban','Rural'] * 11

In [20]:
# Create data
df02 = pd.DataFrame({'group':list_group,
              'platform': list_platform,
              'values':list_values
             })

In [21]:
#df02.to_csv("./../../data/platforms_demographic_groups.csv", index=False)