In [1]:
import pandas as pd
import plotly.express as px

In [2]:
# Importar dataset y revisar datos
boardgames_data = pd.read_csv('../datasets/boardgames_ranks.csv')
boardgames_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 151733 entries, 0 to 151732
Data columns (total 15 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   id                   151733 non-null  int64  
 1   name                 151733 non-null  object 
 2   yearpublished        151733 non-null  int64  
 3   rank                 151733 non-null  int64  
 4   bayesaverage         151733 non-null  float64
 5   average              151733 non-null  float64
 6   usersrated           151733 non-null  int64  
 7   abstracts_rank       1408 non-null    float64
 8   cgs_rank             347 non-null     float64
 9   childrensgames_rank  1044 non-null    float64
 10  familygames_rank     3114 non-null    float64
 11  partygames_rank      878 non-null     float64
 12  strategygames_rank   2867 non-null    float64
 13  thematic_rank        1590 non-null    float64
 14  wargames_rank        4176 non-null    float64
dtypes: float64(10), i

In [3]:
boardgames_data.sample(10)

Unnamed: 0,id,name,yearpublished,rank,bayesaverage,average,usersrated,abstracts_rank,cgs_rank,childrensgames_rank,familygames_rank,partygames_rank,strategygames_rank,thematic_rank,wargames_rank
37212,19741,Blue Moon: The Pillar,2005,0,5.93685,7.21418,684,,,,,,,,
7825,27306,Venedig,2007,7826,5.61099,6.27135,356,,,,,,,,
28544,7438,"Adventures with Clifford, the Big Red Dog",1992,0,0.0,4.0,1,,,,,,,,
3159,6866,Mus,1745,3160,5.94719,7.94853,495,,,,882.0,,,,
35862,17968,N30: We Are Winning!,2004,0,0.0,6.75,2,,,,,,,,
65598,97470,Jarmial,2010,0,0.0,6.41429,7,,,,,,,,
109994,273920,Djinns of the High Desert,2019,0,0.0,7.4375,8,,,,,,,,
115382,294777,"Black Powder: Glory, Hallelujah!",2016,0,0.0,6.61111,9,,,,,,,,
42258,26225,Caroti,2006,0,0.0,4.7,3,,,,,,,,
147388,399751,Maailmanpelastuspeli,2019,0,0.0,6.0,2,,,,,,,,


In [4]:
# Contar cuantos juegos no posee año de publicación
bg_no_year_count = boardgames_data[boardgames_data['yearpublished'] == 0]['id'].count()
print(f'Hay un total de {bg_no_year_count} juegos sin año de publicación')

Hay un total de 11325 juegos sin año de publicación


## Preparación de datos
Existen varias filas que no presentan Año de lanzamiento ni ranking, para este caso, además muchos ranking por categoria son nulos, estos se utilizaran para obtener las categorias de cada juego. Aun que para este analisis de omitiran las filas que no presenten año.

In [5]:

boardgames_data = boardgames_data[((boardgames_data['yearpublished'] != 0))]

In [83]:
boardgames_data.sample(5, random_state=35)

Unnamed: 0,id,name,yearpublished,rank,bayesaverage,average,usersrated,abstracts_rank,cgs_rank,childrensgames_rank,familygames_rank,partygames_rank,strategygames_rank,thematic_rank,wargames_rank,category
135585,358123,Solitaire Legends: Tiger Lily,2022,0,0.0,6.625,8,,,,,,,,,undefined
24563,18574,Dark Force,1994,24564,5.47262,4.43455,55,,328.0,,,,,,,Cartas
26519,3260,Gaukelspiel,1998,0,0.0,6.28571,21,,,,,,,,,undefined
2845,283797,Exit: The Game – The Stormy Flight,2019,2846,5.99892,6.82805,1263,,,,,,,512.0,,Tematico
7191,1202,The Sorcerer's Cave,1978,7192,5.62961,6.48285,393,,,,1916.0,,,1064.0,,Familia


In [7]:
def categorize_game(game):
    category = 'undefined'
    categories = {
        'abstracts_rank' : 'Abstractos',
        'cgs_rank' : 'Cartas',
        'childrensgames_rank' : 'Niños',
        'familygames_rank' : 'Familia',
        'partygames_rank' : 'Fiesta',
        'strategygames_rank' : 'Estrategia',
        'thematic_rank' : 'Tematico',
        'wargames_rank' : 'De guerra'
    }
    rank_cat = 0
    for category_rank in categories.keys():
        if (not pd.isna(game[category_rank]) and category == 'undefined'):
            category = categories[category_rank]
            rank_cat = game[category_rank]
        elif not pd.isna(game[category_rank]):
            if game[category_rank] > rank_cat:
                category = categories[category_rank]
                rank_cat = game[category_rank]
    return category


In [8]:
boardgames_data['category'] = boardgames_data.apply(categorize_game, axis=1)

In [85]:
# Creación histograma
fig = px.histogram(
    boardgames_data[boardgames_data['average'] != 0], 
    x='average', nbins=20, 
    color='category',
    labels=dict(average="Puntucación promedio", category='Categorias')
)
fig.show()





In [86]:
# Diagrama de barras

#Filtrar para eliminar undefined
bg_category_filtered = boardgames_data[boardgames_data['category']!='undefined']['category']
category_counts = bg_category_filtered.value_counts()
fig = px.bar(
    x=category_counts.index, 
    y=category_counts.values, 
    labels={'x':'Categoría', 'y':'Cantidad de juegos'}
)
fig.show()

In [95]:
# Grafico de barras

# Filtrar juegos publicados despues de 1950 cuya categoria no sea indefinido
filtered_data = boardgames_data[(boardgames_data['yearpublished'] >= 1950)]

# Creacion de 
grouped_data = filtered_data.groupby(['yearpublished', 'category']).size().reset_index(name='count')


fig = px.bar(grouped_data, x='yearpublished', y='count', color='category',
             labels={'count': 'Número de Juegos', 'yearpublished': 'Año', 'category':'Categoria'},
             title='Expansión de juegos de mesa en el tiempo')
fig.show()





In [99]:
# Grafico de dispersión
fig = px.scatter(
    boardgames_data, 
    x="average", 
    y="usersrated",
    title='Relación puntuación/numero de votos',
    labels={'usersrated':'Cantidad de votos', 'average':'Puntucación promedio'}
) 
fig.show()

In [104]:
#Filtrar juegos que esten dentro del ranking
filtered_data = boardgames_data[boardgames_data['rank'] != 0]
fig = px.scatter(
    filtered_data, 
    x="rank", 
    y="average", 
    color='category',
    title='Como se relaciona el ranking de un juego con su puntuacion',
    labels={'rank':'Lugar en el ranking', 'average':'Puntuación promedio'})
fig.show()



