# Caso Práctico - Visualización de Datos
---
Visualización de Datos del Máster Universitario en Ingeniería y Ciencia de Datos (2023/2024)

Alumno: **Javier Orive Soto**


# 1. Importación de dependencias

In [1]:
import pandas as pd
import panel as pn
pn.extension('vega')

import numpy as np
import altair as alt

import datetime as dt

# 2. Importación de datos

## 2.1 Datos de generación

### 2.1.1. Grafico de generacion por tecnología (días)

In [2]:
# Cargar el archivo Excel
#file_path = 'Datos/Economico/PrecioMedioHorarioFinal_2023_d.xls'
file_path = 'Datos/Generacion/GeneracionTotal_2023_h.csv'
#df = pd.read_excel(file_path)
df_G = pd.read_csv(file_path, delimiter=';')

# Seleccionar solo las columnas de interés
#df = df[['name', 'value', 'datetime']]

In [3]:
# Convertir la columna datetime a tipo datetime
df_G['datetime'] = pd.to_datetime(df_G['datetime'], utc = True) #, utc = True

# Convertir la columna value a tipo float
df_G['value'] = df_G['value'].astype(float)

# Convertir la columna name a tipo categoría
df_G['name'] = df_G['name'].astype('category')

# Elimino las columnas vacías
df_G = df_G.drop(['geoid', 'geoname'], axis=1)

In [4]:
df_G = df_G.drop(df_G[df_G['id']== 10195].index)
df_G['name'] = df_G['name'].str.slice(18)
df_G['date'] = df_G['datetime'].dt.date

df_G['year'] = df_G['datetime'].dt.year
df_G['month'] = df_G['datetime'].dt.month
df_G['day'] = df_G['datetime'].dt.day
df_G['hour'] = df_G['datetime'].dt.hour

In [5]:
df_G.head()

Unnamed: 0,id,name,value,datetime,date
4321,1169,Biogás,83.937,2022-12-31 23:00:00+00:00,2022-12-31
4322,1169,Biogás,81.609,2023-01-01 00:00:00+00:00,2023-01-01
4323,1169,Biogás,80.939,2023-01-01 01:00:00+00:00,2023-01-01
4324,1169,Biogás,80.873,2023-01-01 02:00:00+00:00,2023-01-01
4325,1169,Biogás,80.765,2023-01-01 03:00:00+00:00,2023-01-01


In [6]:
df_G.info()

<class 'pandas.core.frame.DataFrame'>
Index: 155473 entries, 4321 to 164210
Data columns (total 5 columns):
 #   Column    Non-Null Count   Dtype              
---  ------    --------------   -----              
 0   id        155473 non-null  int64              
 1   name      155473 non-null  object             
 2   value     155473 non-null  float64            
 3   datetime  155473 non-null  datetime64[ns, UTC]
 4   date      155473 non-null  object             
dtypes: datetime64[ns, UTC](1), float64(1), int64(1), object(2)
memory usage: 7.1+ MB


In [7]:
# https://towardsdatascience.com/how-to-build-a-time-series-dashboard-in-python-with-panel-altair-and-a-jupyter-notebook-c0ed40f02289
date_slider = pn.widgets.DateSlider(name='Date Slider', start=dt.datetime(2023, 1, 1), end=dt.datetime(2023, 12, 31), value=dt.datetime(2023, 1, 1))

date_slider

BokehModel(combine_events=True, render_bundle={'docs_json': {'787e628e-6469-45f1-9d0f-cfe7ddf1fbe4': {'version…

In [8]:
@pn.depends(date_slider.param.value)
def get_plot(date):
     # Load and format the data
     df = df_G # define df
     ##df[‘date’] = pd.to_datetime(df[‘date’])
     # create date filter using values from the range slider
     # store the first and last date range slider value in a var
     start_date = date_slider.value
     # create filter mask for the dataframe
     mask = (df['date'] == start_date)
     df = df.loc[mask] # filter the dataframe

     selection = alt.selection_point(fields=['name'], bind='legend')

     # create the Altair chart object
     chart = alt.Chart(df).mark_area(interpolate='step').encode(
               x=alt.X('datetime:T', title='Hora del día'),  # Eje X: Hora del día
               y=alt.Y('sum(value):Q', title='Potencia eléctrica (MW)', scale=alt.Scale(domain=[0, 42000])),  # Suma de potencia
               color=alt.Color('name:N', title='Tecnología'),  # Diferenciar por tecnología
               opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
               ).properties(
               title='Generación eléctrica por tecnología',
               width=700,
               height=400
               ).add_params(selection)

     return chart

In [9]:
title = '## Desglose de generación eléctrica'
subtitle = 'Este dashboard permite al usuario seleccionar\n el día para el que se quieren visualizar los datos.\n\n Además, también se puede seleccionar la tecnología\n para resaltarla en el gráfico.'

dashboard = pn.Row(get_plot,
                   pn.Column(title, subtitle, date_slider)
)

In [10]:
dashboard

BokehModel(combine_events=True, render_bundle={'docs_json': {'b4af3fc8-51a4-4177-b047-50e6d577bea2': {'version…

### 2.1.2. Grafico de media anual generacion por tecnología

In [82]:
df_aux = df_G.groupby('name')['value'].sum().sort_values(ascending=False).reset_index()

# Calcular la suma total de la columna 'value'
total_sum = df_aux['value'].sum()

# Normalizar la columna 'value' a un 100%
df_aux['value_normalized'] = (df_aux['value'] / total_sum)

In [83]:
df_aux

Unnamed: 0,name,value,value_normalized
0,Eólica terrestre,61080760.0,0.2419517
1,Nuclear,54124150.0,0.2143953
2,Ciclo combinado,39201300.0,0.1552832
3,Solar fotovoltaica,36483000.0,0.1445156
4,Hidráulica UGH,20687120.0,0.08194533
5,Gas Natural Cogeneración,16532100.0,0.06548655
6,Turbinación bombeo,5184742.0,0.02053767
7,Solar térmica,4673006.0,0.0185106
8,Hidráulica no UGH,4547275.0,0.01801255
9,Hulla antracita,2933010.0,0.01161817


In [88]:
selection = alt.selection_point(fields=['name'], bind='legend')

# Base del gráfico
base = alt.Chart(df_aux).transform_window(
    rank='rank(value)',
    sort=[alt.SortField('value', order='descending')]
).encode(
    alt.Theta("value:Q", sort=df_aux['name'].to_list()).stack(True),
    alt.Radius("value").scale(type="sqrt", zero=True),
    color=alt.Color("name:N", title='Tecnología'),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_params(selection).properties(
    title='Generación eléctrica anual por tecnología',
    width=400,
    height=400
)

# Gráfico de arco
c1 = base.mark_arc(innerRadius=20, stroke="#fff")

# Gráfico de texto con formato
c2 = base.mark_text(radiusOffset=10).encode(
    text=alt.Text("value_normalized:Q", format=".0%")
    ).transform_filter(
    alt.datum.rank <= 6
)

# Mostrar el gráfico
c1 + c2

In [76]:
selection = alt.selection_point(fields=['name'], bind='legend')

base = alt.Chart(df_aux).encode(
    alt.Theta("value:Q", sort=df_aux['name'].to_list()).stack(True),
    alt.Radius("value").scale(type="sqrt", zero=True),
    color=alt.Color("name:N", title='Tecnología'),
    #color="name:N",
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_params(selection
).properties(
title='Generación eléctrica por tecnología',
width=400,
height=300)

c1 = base.mark_arc(innerRadius=20, stroke="#fff")

c2 = base.mark_text(radiusOffset=10).encode(text="value_normalized")

c1 + c2

## 2.2 Datos económicos

### 2.2.1. Desglose de precio

In [32]:
# Cargar el archivo Excel
file_path = 'Datos/Economico/PrecioMedioHorarioFinal_2023_h.xlsx'
df_E_es = pd.read_excel(file_path)

# Seleccionar solo las columnas de interés
df_E_es = df_E_es[['id', 'name', 'value', 'datetime']]

In [33]:
# Convertir la columna datetime a tipo datetime
df_E_es['datetime'] = pd.to_datetime(df_E_es['datetime'], utc = True) #, utc = True

# Convertir la columna name a tipo categoría
df_E_es['name'] = df_E_es['name'].astype('category')

In [34]:
df_E_es = df_E_es.drop(df_E_es[df_E_es['id']== 10211].index)
df_E_es['name'] = df_E_es['name'].str.slice(32).str.capitalize()
df_E_es['date'] = df_E_es['datetime'].dt.date

In [35]:
df_E_es#['name'].unique()

Unnamed: 0,id,name,value,datetime,date
8760,811,Banda secundaria,4.64,2022-12-31 23:00:00+00:00,2022-12-31
8761,811,Banda secundaria,6.25,2023-01-01 00:00:00+00:00,2023-01-01
8762,811,Banda secundaria,6.98,2023-01-01 01:00:00+00:00,2023-01-01
8763,811,Banda secundaria,5.26,2023-01-01 02:00:00+00:00,2023-01-01
8764,811,Banda secundaria,2.80,2023-01-01 03:00:00+00:00,2023-01-01
...,...,...,...,...,...
148915,1277,Servicio de interrumpibilidad,0.00,2023-12-31 18:00:00+00:00,2023-12-31
148916,1277,Servicio de interrumpibilidad,0.00,2023-12-31 19:00:00+00:00,2023-12-31
148917,1277,Servicio de interrumpibilidad,0.00,2023-12-31 20:00:00+00:00,2023-12-31
148918,1277,Servicio de interrumpibilidad,0.00,2023-12-31 21:00:00+00:00,2023-12-31


In [51]:
@pn.depends(date_slider.param.value)
def get_plot_price(date):
     # Load and format the data
     df = df_E_es # define df
     ##df[‘date’] = pd.to_datetime(df[‘date’])
     # create date filter using values from the range slider
     # store the first and last date range slider value in a var
     start_date = date_slider.value
     # create filter mask for the dataframe
     mask = (df['date'] == start_date)
     df = df.loc[mask] # filter the dataframe

     selection = alt.selection_point(fields=['name'], bind='legend')

     # create the Altair chart object
     chart = alt.Chart(df).mark_area(interpolate='step').encode(
               x=alt.X('datetime:T', title='Hora del día'),  # Eje X: Hora del día
               y=alt.Y('value:Q', title='Precio por MWh (€/MWh)', scale=alt.Scale(domain=[-10, 250])),
               color=alt.Color('name:N', title='Concepto'),  # Diferenciar por tecnología
               opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
               order=alt.Order('sum(value):Q', sort='descending')
               ).properties(
               title='Precio electricidad',
               width=700,
               height=400
               ).add_params(selection)

     return chart

In [52]:
title = '## Desglose del precio de la electricidad'
subtitle = 'Este dashboard permite al usuario seleccionar\n el día para el que se quieren visualizar los datos.\n\n Además, también se puede seleccionar la tecnología\n para resaltarla en el gráfico.'

dashboard = pn.Row(get_plot_price,
                   pn.Column(title, subtitle, date_slider))

  super().__init__(**params)


In [53]:
dashboard

BokehModel(combine_events=True, render_bundle={'docs_json': {'ee7b3250-1723-4463-ab44-b759db82786a': {'version…

In [61]:

def prices(df):
    
    # Crear una máscara de filtro para la fecha seleccionada
    #mask = (df['date'] == date)
    #df_filtered = df.loc[mask]

    df_filtered = df.groupby('datetime')['value'].sum().reset_index()


    print(df_filtered['value'].describe())

prices(df_E_es)

count    8760.000000
mean       97.723726
std        40.100838
min         6.190000
25%        74.765000
50%       101.265000
75%       124.850000
max       251.510000
Name: value, dtype: float64


In [76]:
# Cargar el archivo Excel
file_path = 'Datos/Economico/PrecioEuropa_2023_h.csv'
#df = pd.read_excel(file_path)
df_E_eu = pd.read_csv(file_path, delimiter=';')

In [77]:
df_E_eu['name'].unique()

array(['Precio mercado SPOT Diario Portugal',
       'Precio mercado SPOT Diario Francia',
       'Precio mercado SPOT Diario España',
       'Precio mercado SPOT Diario Reino Unido',
       'Precio mercado SPOT Diario Italia',
       'Precio mercado SPOT Diario Alemania',
       'Precio mercado SPOT Diario Bélgica',
       'Precio mercado SPOT Diario Países Bajos',
       'Término de facturación de energía activa del PVPC 2.0TD Península',
       'Término de facturación de energía activa del PVPC 2.0TD Canarias',
       'Término de facturación de energía activa del PVPC 2.0TD Baleares',
       'Término de facturación de energía activa del PVPC 2.0TD Ceuta',
       'Término de facturación de energía activa del PVPC 2.0TD Melilla'],
      dtype=object)

In [78]:
# Convertir la columna datetime a tipo datetime
df_E_eu['datetime'] = pd.to_datetime(df_E_eu['datetime'], utc = True) #, utc = True

# Convertir la columna name a tipo categoría
df_E_eu['name'] = df_E_eu['name'].astype('category')


df_E_eu = df_E_eu.drop(df_E_eu[df_E_eu['id']== 1001].index)
df_E_eu['name'] = df_E_eu['name'].str.slice(27).str.capitalize()
df_E_eu['date'] = df_E_eu['datetime'].dt.date

In [123]:
@pn.depends(date_slider.param.value)
def get_plot_price_pais(date):
     # Load and format the data
     df = df_E_eu # define df
     ##df[‘date’] = pd.to_datetime(df[‘date’])
     # create date filter using values from the range slider
     # store the first and last date range slider value in a var
     start_date = date_slider.value
     # create filter mask for the dataframe
     mask = (df['date'] == start_date)
     df = df.loc[mask] # filter the dataframe
    
     df['datetime'] = df['datetime'] + dt.timedelta(hours=0, minutes=30)
     
     order = ['Alemania','Bélgica','España','Francia','Italia','Países bajos','Portugal','Reino unido']
     # Convertir la columna 'category' a un tipo categórico con el orden definido
     df['name'] = pd.Categorical(df['name'], categories=order, ordered=True)
     
     # Ordenar el DataFrame según la columna 'category'
     df = df.sort_values('name')


     selection = alt.selection_point(fields=['name'], bind='legend')

     # create the Altair chart object
     """
     chart = alt.Chart(df).mark_line(interpolate='step').encode(
               x=alt.X('datetime:T', title='Hora del día'),  # Eje X: Hora del día
               y=alt.Y('value:Q', title='Precio por MWh (€/MWh)', scale=alt.Scale(domain=[-10, 250])),
               color=alt.Color('name:N', title='País'), 
               opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
               ).properties(
               title='Precio electricidad por paises',
               width=700,
               height=400
               ).add_params(selection)
     """
     # Create a selection that chooses the nearest point & selects based on x-value
     nearest = alt.selection_point(nearest=True, on="pointerover",
                              fields=["datetime"], empty=False)
     
     # The basic line
     line = alt.Chart(df).mark_line(interpolate='step').encode(
               x=alt.X('datetime:T', title='Hora del día'),  # Eje X: Hora del día
               y=alt.Y('value:Q', title='Precio por MWh (€/MWh)', scale=alt.Scale(domain=[-10, 250])),
               color=alt.Color('name:N', title='País'), 
               opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
               ).properties(
               title='Precio electricidad por paises',
               width=700,
               height=400
               ).add_params(selection)

     # Draw points on the line, and highlight based on selection
     points = line.mark_point().encode(
          opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
     
     # Draw a rule at the location of the selection
     rules = alt.Chart(df).transform_pivot(
        "name",
        value="value",
        groupby=["datetime"]).mark_rule(color="gray").encode(
         x="datetime",
         opacity=alt.condition(nearest, alt.value(0.3), alt.value(0)),
         tooltip=[alt.Tooltip(c, type="quantitative") for c in order],
     ).add_params(nearest)

     # Put the five layers into a chart and bind the data
     chart = alt.layer(
         line, points, rules
     ).properties(
         width=600, height=300
     )

     return chart

In [124]:
title = '## Desglose del precio de la electricidad'
subtitle = 'Este dashboard permite al usuario seleccionar\n el día para el que se quieren visualizar los datos.\n\n Además, también se puede seleccionar la tecnología\n para resaltarla en el gráfico.'

dashboard = pn.Row(get_plot_price_pais,
                   pn.Column(title, subtitle, date_slider))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['datetime'] = df['datetime'] + dt.timedelta(hours=0, minutes=30)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['name'] = pd.Categorical(df['name'], categories=order, ordered=True)
  super().__init__(**params)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['datetime'] = df['datetime'] + dt

In [125]:
dashboard

BokehModel(combine_events=True, render_bundle={'docs_json': {'fcf716f4-6d8b-40bd-ab52-8712938156e3': {'version…

In [62]:
import altair as alt
from vega_datasets import data

counties = alt.topo_feature(data.us_10m.url, 'counties')
source = data.unemployment.url

alt.Chart(counties).mark_geoshape().encode(
    color='rate:Q'
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(source, 'id', ['rate'])
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300
)

In [63]:
counties

UrlData({
  format: TopoDataFormat({
    feature: 'counties',
    type: 'topojson'
  }),
  url: 'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/us-10m.json'
})

## 2.3 Datos de emisiones

### 2.3.1. 

In [16]:
# Cargar el archivo Excel
file_path = 'Datos/Emisiones/output/CI_bottom_up_method.csv'
#df = pd.read_excel(file_path)
df_emis = pd.read_csv(file_path, delimiter=',')


In [17]:
# Convertir la columna datetime a tipo datetime
df_emis['datetime'] = pd.to_datetime(df_emis['datetime'], utc = True) #, utc = True
df_emis['date'] = df_emis['datetime'].dt.date

In [19]:
df_emis = df_emis[['datetime','date','BE','DE','ES','FR','GB','IT','NL','PT']]

In [20]:
df_emis = pd.melt(df_emis, id_vars=['datetime', 'date'], var_name='name', value_name='value')


In [21]:
df_emis_avg = df_emis.copy().groupby(['date','name']).agg({'value': 'mean'}).reset_index()

In [22]:
df_emis_avg['date'] = pd.to_datetime(df_emis_avg['date'])
df_emis_avg

Unnamed: 0,date,name,value
0,2023-01-01,BE,117.320993
1,2023-01-01,DE,235.083593
2,2023-01-01,ES,118.545061
3,2023-01-01,FR,20.797483
4,2023-01-01,GB,232.981818
...,...,...,...
2915,2023-12-31,FR,32.469587
2916,2023-12-31,GB,209.400385
2917,2023-12-31,IT,331.116052
2918,2023-12-31,NL,507.534704


In [29]:
# df de potencias
df_pot = pd.read_csv('Datos/Emisiones/archive/combined.csv', delimiter=',')
df_pot['date'] = pd.to_datetime(df_pot['date'])

In [30]:
df_pot

Unnamed: 0,date,power,name
0,2023-01-01,8330.635417,BE
1,2023-01-02,9918.520833,BE
2,2023-01-03,10352.572917,BE
3,2023-01-04,10825.218750,BE
4,2023-01-05,10826.447917,BE
...,...,...,...
2915,2023-12-27,5949.500000,PT
2916,2023-12-28,5763.125000,PT
2917,2023-12-29,5412.125000,PT
2918,2023-12-30,5184.250000,PT


In [31]:
df_pot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2920 entries, 0 to 2919
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    2920 non-null   datetime64[ns]
 1   power   2920 non-null   float64       
 2   name    2920 non-null   object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 68.6+ KB


In [32]:
result_df = pd.merge(df_emis_avg, df_pot, on=['date', 'name'], how='inner')
result_df

Unnamed: 0,date,name,value,power
0,2023-01-01,BE,117.320993,8330.635417
1,2023-01-01,DE,235.083593,44306.927083
2,2023-01-01,ES,118.545061,22456.916667
3,2023-01-01,FR,20.797483,54591.791667
4,2023-01-01,GB,232.981818,33337.500000
...,...,...,...,...
2915,2023-12-31,FR,32.469587,62116.125000
2916,2023-12-31,GB,209.400385,32225.125000
2917,2023-12-31,IT,331.116052,27507.708333
2918,2023-12-31,NL,507.534704,12909.125000


In [180]:
alt.Chart(df_emis_avg[df_emis_avg['name']=='ES'], title="Daily Max Temperatures (C) in Seattle, WA").mark_rect().encode(
    alt.X("date(date):O").title("Day").axis(format="%e", labelAngle=0),
    alt.Y("month(date):O").title("Month"),
    alt.Color("value").title(None),
    tooltip=[
        alt.Tooltip("monthdate(date)", title="Date"),
        alt.Tooltip("value", title="Max Temp"),
    ],
).configure_view(
    step=13,
    strokeWidth=0
).configure_axis(
    domain=False)

In [181]:
alt.Chart(df_emis_avg[df_emis_avg['name']=='DE'], title="Daily Max Temperatures (C) in Seattle, WA").mark_rect().encode(
    alt.X("date(date):O").title("Day").axis(format="%e", labelAngle=0),
    alt.Y("month(date):O").title("Month"),
    alt.Color("value").title(None),
    tooltip=[
        alt.Tooltip("monthdate(date)", title="Date"),
        alt.Tooltip("value", title="Max Temp"),
    ],
).configure_view(
    step=13,
    strokeWidth=0
).configure_axis(
    domain=False)

In [None]:
import altair as alt
from vega_datasets import data 
source = data.iris()

base = alt.Chart(result_df)
base_bar = base.mark_bar(opacity=0.3, binSpacing=0)

xscale = alt.Scale(domain=(4.0, 8.0))
yscale = alt.Scale(domain=(1.9, 4.55))

points = base.mark_circle().encode(
    alt.X("value"),
    alt.Y("power"),
    color="name",
)

top_hist = (
    base_bar
    .encode(
        alt.X("value:Q")
            # when using bins, the axis scale is set through
            # the bin extent, so we do not specify the scale here
            # (which would be ignored anyway)
            .bin(maxbins=20).stack(None).title(""),
        alt.Y("count()").stack(None).title(""),
        alt.Color("name:N"),
    )
    .properties(height=60)
)

right_hist = (
    base_bar
    .encode(
        alt.Y("power:Q")
            .bin(maxbins=20)
            .stack(None)
            .title(""),
        alt.X("count()").stack(None).title(""),
        alt.Color("name:N"),
    )
    .properties(width=60)
)

top_hist & (points | right_hist)

In [73]:
id_mapping = {
    'BE': 56,
    'DE': 276,
    'ES': 724,
    'FR': 250,
    'GB': 826,
    'IT': 380,
    'NL': 528,
    'PT': 620
}

# Filtrar los datos para el 2023-01-01
df_filtered = result_df[result_df['date'] == '2023-01-01']

# Crear la nueva columna 'id' utilizando el método `map()`
df_filtered['id'] = df_filtered['name'].map(id_mapping)

# Mapa de Europa (GeoJSON), aquí se está utilizando un mapa base de países para combinar con los datos
countries = alt.topo_feature(data.world_110m.url, 'countries')

# Crear un gráfico que muestra el valor
chart = alt.Chart(countries).mark_geoshape().encode(
    color=alt.Color('value:Q', title='Value', scale=alt.Scale(scheme='viridis')),
    tooltip=['name:N', 'value:Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(df_filtered, 'id', ['name','value'])
).properties(
    title='Value by Country for January 1, 2023',
    width=300,
    height=250
).project(
    type= 'naturalEarth1',
    scale= 450,                          # Magnify
    center= [5,47],                     # [lon, lat]
    clipExtent= [[-0, 0], [400, 400]])    # [[left, top], [right, bottom]])


# Mostrar el gráfico
chart.display()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['id'] = df_filtered['name'].map(id_mapping)
