## Import packages

In [90]:
# importar paquetes a ocupar
import brawlstats
import pandas as pd
import datetime
import ipywidgets as widgets
# import zipfile
# import os
import api_key as key
import fastparquet

## Import from API

In [91]:
# crear cliente
client = brawlstats.Client(key.api_key)

In [92]:
# importar brawlers
brawlers = pd.DataFrame()
json_brawlers = client.get_brawlers().raw_data
brawlers = pd.concat([brawlers, pd.json_normalize(json_brawlers)])

In [93]:
brawlers = pd.merge(left=brawlers, right=pd.json_normalize(brawlers['starPowers']).add_prefix('starPowers.'), left_index=True, right_index=True)
brawlers = pd.merge(left=brawlers, right=pd.json_normalize(brawlers['gadgets']).add_prefix('gadgets.'), left_index=True, right_index=True)
brawlers = brawlers.drop('starPowers', axis=1)
brawlers = brawlers.drop('gadgets', axis=1)

In [94]:
# reset brawler index y export de dataset
brawlers.to_parquet('datasets/brawlers/brawlers.parquet', index=False, engine='fastparquet', compression='gzip')

print(brawlers.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            64 non-null     int64 
 1   name          64 non-null     object
 2   starPowers.0  64 non-null     object
 3   starPowers.1  64 non-null     object
 4   gadgets.0     64 non-null     object
 5   gadgets.1     64 non-null     object
dtypes: int64(1), object(5)
memory usage: 3.1+ KB
None


In [95]:
# import información adicional de brawlers
brawlers_classification = pd.read_csv('datasets/brawlers/brawlers_classification.csv', index_col=0)

In [96]:
# merge de ambos dataframes
brawlerStats = pd.merge(brawlers, brawlers_classification, on='id')
print(brawlerStats.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 64 entries, 0 to 63
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              64 non-null     int64  
 1   name            64 non-null     object 
 2   starPowers.0    64 non-null     object 
 3   starPowers.1    64 non-null     object 
 4   gadgets.0       64 non-null     object 
 5   gadgets.1       64 non-null     object 
 6   Brawler         64 non-null     object 
 7   Health          64 non-null     float64
 8   Rarity          64 non-null     object 
 9   Class           64 non-null     object 
 10  Movement_Speed  64 non-null     object 
 11  Range           63 non-null     object 
 12  Reload          63 non-null     object 
 13  Speed_Num       64 non-null     int64  
 14  Range_Num       64 non-null     float64
 15  Reload_Num      63 non-null     float64
 16  Role            64 non-null     object 
dtypes: float64(3), int64(2), object(12)
m

In [97]:
# export dataframe final brawlers
brawlerStats.to_parquet('datasets/brawlers/brawlers_stats.parquet', index=False, engine='fastparquet', compression='gzip')

print('dimensiones brawlerStats: ' + str(brawlerStats.shape))

dimensiones brawlerStats: (64, 17)


In [98]:
countryCode = ['US','MX','BR','GB','CA','DE','FR','ES','IT','RU','TR','AR','PL','CO','IN','ID','UA','AU','NL','JP','KR','CZ','CH','PH','MY']#,'VN','IE','TH','IL','NO','FI','PT','AT','GR','HU','SG','SA','AE','SE','DK','BZ','CR','GT','HN','NI','PA','SV','BO','CL','EC']
len(countryCode)

25

In [99]:
# sacar el player tag de los top 200 players
topplayer_tag=[]

# top global
leaderboard = client.get_rankings(ranking='players')
for i in leaderboard:
    topplayer_tag.append(i.tag)

# top por regiones en countryCode
for i, item in enumerate(countryCode):
   leaderboard = client.get_rankings(ranking='players',region=item)
   for i in leaderboard:
       topplayer_tag.append(i.tag)

topplayer_tag = list(set(topplayer_tag))

print('cantidad top player tag: ' + str(len(topplayer_tag)))

cantidad top player tag: 5032


In [100]:
# importar battelog de api
progress = widgets.IntProgress(
	value=0, 
	min=0, 
	max=100, 
	description='cargando:',
	style={'bar_color': 'maroon'}) 

processed = widgets.BoundedFloatText(
    value=0,
    min=0,
    max=len(topplayer_tag),
    description='procesado:',
    disabled=False
)

display(progress)
display(processed)

data = {}

for i in range(len(topplayer_tag)):
    json_battlelog = {}
    playertag = topplayer_tag[i]
    try:
        json_battlelog = client.get_battle_logs(playertag).raw_data
    except:
        print("No se pudo recuperar battlelog de tag " + playertag)
    
    for k in range(len(json_battlelog)):
        loaded_json = json_battlelog[k]
        loaded_json['playertag'] = playertag
        data[str(i) + '-' + str(k)] = loaded_json

        progress.value = ((i+1) / len(topplayer_tag)) * 100
        processed.value = i+1

battlelog = pd.DataFrame.from_dict(data, orient='index').reset_index(drop=True)

IntProgress(value=0, description='cargando:', style=ProgressStyle(bar_color='maroon'))

BoundedFloatText(value=0.0, description='procesado:', max=5032.0)

No se pudo recuperar battlelog de tag #2JU2UQYJP


In [101]:
# creación del dataframe
#battlelog = pd.DataFrame()

#def split_json(s):
#	# separa un string por el item "
#	s = str(s).split('"')
#
#	return s

#def clean_json(jsonitem):
#	# para una lista de artiuclos de json, separa en " y limpia los elementos impares de la lista resultante 
#	for i in range(len(jsonitem)):
#		splitted = split_json(jsonitem[i])
#		for j in range(len(splitted)):
#			splitted[j] = str(splitted[j]).replace('None','"None"').replace('False','"False"').replace('True','"True"')
#			if j % 2 == 1 and len(splitted[j]) < 20:
#				splitted[j] = '"' + str(splitted[j]).replace("'",'`') + '"'
#			else:
#				splitted[j] = str(splitted[j]).replace("'",'"')
#		jsonitem[i] = ''.join(splitted)
#	
#	return jsonitem

#def cleanjson(val):
#	result = str(val).replace('"',"`").replace(': `', ": '").replace('`,',"',").replace('`}',"'}").replace("e's",'e`s').replace('"ll','`ll').replace("I'm", 'I`m').replace("i'm", 'i`m').replace("' ","` ").replace("t's","t`s").replace('None',"'None'").replace("'",'"')
#	return result

# progress = widgets.IntProgress(
# 	value=0, 
# 	min=0, 
# 	max=100, 
# 	description='cargando:',
# 	style={'bar_color': 'maroon'}) 

# processed = widgets.BoundedFloatText(
#     value=0,
#     min=0,
#     max=len(topplayer_tag),
#     description='procesado:',
#     disabled=False
# )

# display(progress)
# display(processed)

# for i in range(len(topplayer_tag)):

# 	json_battlelog = {}
# 	playertag = topplayer_tag[i]
# 	try:
# 		json_battlelog = client.get_battle_logs(playertag).raw_data
# 	except:
# 		print("No se pudo recuperar battlelog de tag " + playertag)

# 	for k in range(len(json_battlelog)):
# 		loaded_json = json_battlelog[k]
# 		json_battlelog[k]['playertag'] = playertag
# 		try:
# 			battlelog = battlelog.append(pd.json_normalize(loaded_json), ignore_index=True)
# 		except:
# 			print("no se pudo importar " + playertag + " battlelog numero " + str(k))

# 	progress.value = ((i+1) / len(topplayer_tag)) * 100
# 	processed.value = i


## Data Depuration

In [102]:
# desglose de dataframe
battlelog = pd.merge(left=battlelog, right=pd.json_normalize(battlelog['event']).add_prefix('event.'), left_index=True, right_index=True)
battlelog = pd.merge(left=battlelog, right=pd.json_normalize(battlelog['battle']).add_prefix('battle.'), left_index=True, right_index=True)

In [103]:
# reset battlelog index
battlelog = battlelog.drop('event', axis=1)
battlelog = battlelog.drop('battle', axis=1)
print('dimensiones battlelog: ' + str(battlelog.shape))

dimensiones battlelog: (121831, 28)


In [104]:
battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121831 entries, 0 to 121830
Data columns (total 28 columns):
 #   Column                              Non-Null Count   Dtype  
---  ------                              --------------   -----  
 0   battleTime                          121831 non-null  object 
 1   playertag                           121831 non-null  object 
 2   event.id                            121831 non-null  int64  
 3   event.mode                          111965 non-null  object 
 4   event.map                           111965 non-null  object 
 5   battle.mode                         121831 non-null  object 
 6   battle.type                         113147 non-null  object 
 7   battle.result                       83691 non-null   object 
 8   battle.duration                     85555 non-null   float64
 9   battle.trophyChange                 76524 non-null   float64
 10  battle.teams                        99482 non-null   object 
 11  battle.starPlayer.tag     

In [105]:
# cuenta tipos de juego
battlelog['battle.type'].value_counts()

ranked                   73809
teamRanked               13766
championshipChallenge    10229
soloRanked                9310
friendly                  5507
challenge                  378
tournament                 148
Name: battle.type, dtype: int64

In [106]:
# eliminar friendly
battlelog = battlelog.loc[battlelog['battle.type'] != "friendly"]

In [107]:
# cuenta modos de juego
battlelog['battle.mode'].value_counts() 

brawlBall       26758
duoShowdown     15850
gemGrab         13185
soloShowdown    12577
knockout        11205
hotZone         10949
heist            9987
bigGame          8526
bounty           7129
bossFight         133
roboRumble         25
Name: battle.mode, dtype: int64

In [108]:
# eliminar modos alt y modos showdown
modos_alt = ['bossFight','roboRumble','bigGame','soloShowdown','duoShowdown']
battlelog = battlelog.loc[~battlelog['battle.mode'].isin(modos_alt)]

In [109]:
# eliminar event vacio
battlelog['event.mode'] = battlelog['event.mode'].fillna('unknown')
battlelog = battlelog.loc[battlelog['event.mode'] != "unknown"]

In [110]:
# eliminar map vacio
battlelog['event.map'] = battlelog['event.map'].fillna('unknown')
battlelog = battlelog.loc[battlelog['event.map'] != "unknown"]

In [111]:
# reset battlelog index
battlelog.reset_index(drop=True, inplace=True)

print('dimensiones battlelog: ' + str(battlelog.shape))

dimensiones battlelog: (74259, 28)


In [112]:
# crear listado de players en battelog

#playerlist = pd.DataFrame(pd.concat([
#	battlelog['playertag'], 
#	battlelog['battle.starPlayer.tag'], 
#	battlelog['battle.team1.player1.tag'], 
#	battlelog['battle.team1.player2.tag'], 
#	battlelog['battle.team1.player3.tag'], 
#	battlelog['battle.team2.player1.tag'], 
#	battlelog['battle.team2.player2.tag'], 
#	battlelog['battle.team2.player3.tag']
#	], ignore_index=True).drop_duplicates().reset_index(drop=True))
#
#playerlist.shape

In [113]:
# crear listado nuevos playersplayers_hist
#playerlist_merge = pd.merge(playerlist, players_hist['tag'], left_on=0, right_on='tag', how='left').drop_duplicates().reset_index(drop=True)
#
#playerlist_final = playerlist_merge[0][playerlist_merge['tag'].isna()].drop_duplicates().reset_index(drop=True)
#
#playerlist_final.shape

In [114]:
# agregar datos de player
#players = pd.DataFrame()
#
#for i in range(len(playerlist_final)):
#
#	json_player = []
#	playertag = playerlist_final.loc[i]
#	try:
#		json_player = client.get_profile(playerlist_final.loc[i]).raw_data
#		del json_player['brawlers']
#	except:
#		print("No se pudo recuperar player de tag " + str(playertag))
#
#	try:
#		players = pd.concat([players, pd.json_normalize(json_player)])
#	except:
#		print("no se pudo importar player tag " + str(playertag))

# progress = widgets.IntProgress(
# 	value=0, 
# 	min=0, 
# 	max=100, 
# 	description='cargando:',
# 	style={'bar_color': 'maroon'}) 

# processed = widgets.BoundedFloatText(
#     value=0,
#     min=0,
#     max=len(topplayer_tag),
#     description='procesado:',
#     disabled=False
# )

# display(progress)
# display(processed)

# timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

# player = {}
# json_player = []

# for i, playertag in enumerate(topplayer_tag):
#     try:
#         json_player = client.get_profile(playertag).raw_data
#         del json_player['brawlers']
#         json_player['import_date'] = timestamp
#         player[str(i)] = json_player
#     except:
#         print("No se pudo recuperar profile de tag " + str(playertag))

#     progress.value = ((i+1) / len(topplayer_tag)) * 100
#     processed.value = i+1

# players = pd.DataFrame.from_dict(player, orient='index').reset_index(drop=True)

In [115]:
# players_hist = pd.read_csv('datasets/players/players.csv', index_col=0)
# players_hist.shape

In [116]:
# concatenar las bases
# players = pd.concat([players, players_hist], ignore_index=True).drop_duplicates(['tag']).reset_index(drop=True)

# print('dimensiones players: ' + str(players.shape))

In [117]:
# players.to_csv('datasets/players/players.csv')

In [118]:
# descomponer la columna teams

# def battle_delimiter(i, j):
# 	result = pd.json_normalize(pd.json_normalize(pd.json_normalize(pd.json_normalize(rawdata['battle'])['teams'])[i])[j])
# 	return result

def normalize_to_df(i, t, p):
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.tag'] = normalized[t - 1][p - 1]['tag']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.name'] = normalized[t - 1][p - 1]['name']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.brawler.id'] = normalized[t - 1][p - 1]['brawler.id']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.brawler.name'] = normalized[t - 1][p - 1]['brawler.name']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.brawler.power'] = normalized[t - 1][p - 1]['brawler.power']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.brawler.trophies'] = normalized[t - 1][p - 1]['brawler.trophies']

normalized = pd.DataFrame()

for i, team in enumerate(battlelog['battle.teams']):
	if team != None:
		try:
			normalized = pd.json_normalize(team, errors='ignore').transpose()
			normalize_to_df(i, 1, 1)
			normalize_to_df(i, 1, 2)
			normalize_to_df(i, 1, 3)
			normalize_to_df(i, 2, 1)
			normalize_to_df(i, 2, 2)
			normalize_to_df(i, 2, 3)
		except:
			print("no se pudo transponer")

battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74259 entries, 0 to 74258
Data columns (total 64 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   battleTime                             74259 non-null  object 
 1   playertag                              74259 non-null  object 
 2   event.id                               74259 non-null  int64  
 3   event.mode                             74259 non-null  object 
 4   event.map                              74259 non-null  object 
 5   battle.mode                            74259 non-null  object 
 6   battle.type                            74259 non-null  object 
 7   battle.result                          74259 non-null  object 
 8   battle.duration                        74259 non-null  float64
 9   battle.trophyChange                    53065 non-null  float64
 10  battle.teams                           74259 non-null  object 
 11  ba

In [119]:
# fix column names
battlelog.columns = battlelog.columns.str.replace('.', '_', regex=True)

In [120]:
# select columns
battlelog = battlelog[[
'battleTime'
,'playertag'
,'event_mode'
,'event_map'
,'battle_type'
,'battle_result'
,'battle_duration'
,'battle_trophyChange'
,'battle_team1_player1_tag'
,'battle_team1_player1_name'
,'battle_team1_player1_brawler_id'
,'battle_team1_player1_brawler_name'
,'battle_team1_player1_brawler_power'
,'battle_team1_player1_brawler_trophies'
,'battle_team1_player2_tag'
,'battle_team1_player2_name'
,'battle_team1_player2_brawler_id'
,'battle_team1_player2_brawler_name'
,'battle_team1_player2_brawler_power'
,'battle_team1_player2_brawler_trophies'
,'battle_team1_player3_tag'
,'battle_team1_player3_name'
,'battle_team1_player3_brawler_id'
,'battle_team1_player3_brawler_name'
,'battle_team1_player3_brawler_power'
,'battle_team1_player3_brawler_trophies'
,'battle_team2_player1_tag'
,'battle_team2_player1_name'
,'battle_team2_player1_brawler_id'
,'battle_team2_player1_brawler_name'
,'battle_team2_player1_brawler_power'
,'battle_team2_player1_brawler_trophies'
,'battle_team2_player2_tag'
,'battle_team2_player2_name'
,'battle_team2_player2_brawler_id'
,'battle_team2_player2_brawler_name'
,'battle_team2_player2_brawler_power'
,'battle_team2_player2_brawler_trophies'
,'battle_team2_player3_tag'
,'battle_team2_player3_name'
,'battle_team2_player3_brawler_id'
,'battle_team2_player3_brawler_name'
,'battle_team2_player3_brawler_power'
,'battle_team2_player3_brawler_trophies'
]]

In [121]:
# traer archivo histórico battlelog
# with zipfile.ZipFile('datasets/teams/battlelog_teams.zip', 'r') as zip_ref:
    # zip_ref.extractall('datasets/teams/')
    
battlelog_hist = pd.read_parquet('datasets/teams/battlelog_teams.parquet')

print('dimensiones battlelog hist: ' + str(battlelog_hist.shape))

dimensiones battlelog hist: (605765, 44)


In [122]:
# agregar nuevos reg a histórico
battlelog = pd.concat([battlelog, battlelog_hist])
print('dimensiones battlelog concat: ' + str(battlelog.shape))

dimensiones battlelog concat: (680024, 44)


In [123]:
# eliminar battelogs duplicados
battlelog = battlelog.drop_duplicates(['battleTime', 'event_mode', 'event_map', 'battle_type', 'battle_duration', 'battle_team1_player1_tag'], ignore_index=True)

print('dimensiones battlelog final: ' + str(battlelog.shape))

dimensiones battlelog final: (661236, 44)


In [124]:
# set correct dtypes
battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 661236 entries, 0 to 661235
Data columns (total 44 columns):
 #   Column                                 Non-Null Count   Dtype  
---  ------                                 --------------   -----  
 0   battleTime                             661236 non-null  object 
 1   playertag                              661236 non-null  object 
 2   event_mode                             661236 non-null  object 
 3   event_map                              661236 non-null  object 
 4   battle_type                            661236 non-null  object 
 5   battle_result                          661236 non-null  object 
 6   battle_duration                        661236 non-null  float64
 7   battle_trophyChange                    489956 non-null  float64
 8   battle_team1_player1_tag               661236 non-null  object 
 9   battle_team1_player1_name              661232 non-null  object 
 10  battle_team1_player1_brawler_id        661236 non-null  

In [127]:
# assign correct dtypes
battlelog['battleTime'] = pd.to_datetime(battlelog['battleTime'], format='%Y-%m-%d').dt.date.astype('datetime64[ns]')
battlelog['battle_duration'] = battlelog['battle_duration'].astype('Int16')
battlelog['battle_trophyChange'] = battlelog['battle_trophyChange'].astype('Int8')
battlelog['battle_team1_player1_brawler_id'] = battlelog['battle_team1_player1_brawler_id'].astype('Int32')
battlelog['battle_team1_player1_brawler_power'] = battlelog['battle_team1_player1_brawler_power'].astype('Int8')
battlelog['battle_team1_player1_brawler_trophies'] = battlelog['battle_team1_player1_brawler_trophies'].astype('Int16')
battlelog['battle_team1_player2_brawler_id'] = battlelog['battle_team1_player2_brawler_id'].astype('Int32')
battlelog['battle_team1_player2_brawler_power'] = battlelog['battle_team1_player2_brawler_power'].astype('Int8')
battlelog['battle_team1_player2_brawler_trophies'] = battlelog['battle_team1_player2_brawler_trophies'].astype('Int16')
battlelog['battle_team1_player3_brawler_id'] = battlelog['battle_team1_player3_brawler_id'].astype('Int32')
battlelog['battle_team1_player3_brawler_power'] = battlelog['battle_team1_player3_brawler_power'].astype('Int8')
battlelog['battle_team1_player3_brawler_trophies'] = battlelog['battle_team1_player3_brawler_trophies'].astype('Int16')
battlelog['battle_team2_player1_brawler_id'] = battlelog['battle_team2_player1_brawler_id'].astype('Int32')
battlelog['battle_team2_player1_brawler_power'] = battlelog['battle_team2_player1_brawler_power'].astype('Int8')
battlelog['battle_team2_player1_brawler_trophies'] = battlelog['battle_team2_player1_brawler_trophies'].astype('Int16')
battlelog['battle_team2_player2_brawler_id'] = battlelog['battle_team2_player2_brawler_id'].astype('Int32')
battlelog['battle_team2_player2_brawler_power'] = battlelog['battle_team2_player2_brawler_power'].astype('Int8')
battlelog['battle_team2_player2_brawler_trophies'] = battlelog['battle_team2_player2_brawler_trophies'].astype('Int16')
battlelog['battle_team2_player3_brawler_id'] = battlelog['battle_team2_player3_brawler_id'].astype('Int32')
battlelog['battle_team2_player3_brawler_power'] = battlelog['battle_team2_player3_brawler_power'].astype('Int8')
battlelog['battle_team2_player3_brawler_trophies'] = battlelog['battle_team2_player3_brawler_trophies'].astype('Int16')

battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 661236 entries, 0 to 661235
Data columns (total 44 columns):
 #   Column                                 Non-Null Count   Dtype         
---  ------                                 --------------   -----         
 0   battleTime                             661236 non-null  datetime64[ns]
 1   playertag                              661236 non-null  object        
 2   event_mode                             661236 non-null  object        
 3   event_map                              661236 non-null  object        
 4   battle_type                            661236 non-null  object        
 5   battle_result                          661236 non-null  object        
 6   battle_duration                        661236 non-null  Int16         
 7   battle_trophyChange                    489956 non-null  Int8          
 8   battle_team1_player1_tag               661236 non-null  object        
 9   battle_team1_player1_name              661232 no

In [128]:
# assign category dtypes
battlelog['battle_team1_player1_brawler_name'] = battlelog['battle_team1_player1_brawler_name'].astype('category')
battlelog['battle_team1_player2_brawler_name'] = battlelog['battle_team1_player2_brawler_name'].astype('category')
battlelog['battle_team1_player3_brawler_name'] = battlelog['battle_team1_player3_brawler_name'].astype('category')
battlelog['battle_team2_player1_brawler_name'] = battlelog['battle_team2_player1_brawler_name'].astype('category')
battlelog['battle_team2_player2_brawler_name'] = battlelog['battle_team2_player2_brawler_name'].astype('category')
battlelog['battle_team2_player3_brawler_name'] = battlelog['battle_team2_player3_brawler_name'].astype('category')
battlelog['event_mode'] = battlelog['event_mode'].astype('category')
battlelog['event_map'] = battlelog['event_map'].astype('category')
battlelog['battle_type'] = battlelog['battle_type'].astype('category')
battlelog['battle_result'] = battlelog['battle_result'].astype('category')

battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 661236 entries, 0 to 661235
Data columns (total 44 columns):
 #   Column                                 Non-Null Count   Dtype         
---  ------                                 --------------   -----         
 0   battleTime                             661236 non-null  datetime64[ns]
 1   playertag                              661236 non-null  object        
 2   event_mode                             661236 non-null  category      
 3   event_map                              661236 non-null  category      
 4   battle_type                            661236 non-null  category      
 5   battle_result                          661236 non-null  category      
 6   battle_duration                        661236 non-null  Int16         
 7   battle_trophyChange                    489956 non-null  Int8          
 8   battle_team1_player1_tag               661236 non-null  object        
 9   battle_team1_player1_name              661232 no

In [141]:
# export dataset teams completo mas histórico
battlelog.to_parquet('datasets/teams/battlelog_teams.parquet', index=False, engine='fastparquet', compression='gzip')

# Comprimir el archivo parquet
# with zipfile.ZipFile('datasets/teams/battlelog_teams.zip', 'w') as zip_file:
#     zip_file.write('datasets/teams/battlelog_teams.parquet', arcname='battlelog_teams.parquet',compress_type=zipfile.ZIP_BZIP2)

# os.remove('datasets/teams/battlelog_teams.parquet')

In [130]:
maplist = battlelog[['event_mode','event_map']].drop_duplicates()

maplist.to_parquet('datasets/maps/maplist.parquet', index=False)