## Import packages

In [57]:
# importar paquetes a ocupar
import brawlstats
import pandas as pd
import datetime
# import ipywidgets as widgets
# import zipfile
# import os
import api_key as key
import fastparquet
import concurrent.futures as cf
from tqdm.notebook import tqdm

## Import from API

In [58]:
# crear cliente
client = brawlstats.Client(key.api_key)

In [59]:
# importar brawlers
brawlers = pd.DataFrame()
json_brawlers = client.get_brawlers().raw_data
brawlers = pd.concat([brawlers, pd.json_normalize(json_brawlers)])

In [60]:
brawlers = pd.merge(left=brawlers, right=pd.json_normalize(brawlers['starPowers']).add_prefix('starPowers.'), left_index=True, right_index=True)
brawlers = pd.merge(left=brawlers, right=pd.json_normalize(brawlers['gadgets']).add_prefix('gadgets.'), left_index=True, right_index=True)
brawlers = brawlers.drop('starPowers', axis=1)
brawlers = brawlers.drop('gadgets', axis=1)

In [61]:
# reset brawler index y export de dataset
brawlers.to_parquet('datasets/brawlers/brawlers.parquet', index=False, engine='fastparquet', compression='gzip')

print(brawlers.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            64 non-null     int64 
 1   name          64 non-null     object
 2   starPowers.0  64 non-null     object
 3   starPowers.1  64 non-null     object
 4   gadgets.0     64 non-null     object
 5   gadgets.1     64 non-null     object
dtypes: int64(1), object(5)
memory usage: 3.1+ KB
None


In [62]:
# import información adicional de brawlers
brawlers_classification = pd.read_csv('datasets/brawlers/brawlers_classification.csv', index_col=0)

In [63]:
# merge de ambos dataframes
brawlerStats = pd.merge(brawlers, brawlers_classification, on='id')
print(brawlerStats.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 64 entries, 0 to 63
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              64 non-null     int64  
 1   name            64 non-null     object 
 2   starPowers.0    64 non-null     object 
 3   starPowers.1    64 non-null     object 
 4   gadgets.0       64 non-null     object 
 5   gadgets.1       64 non-null     object 
 6   Brawler         64 non-null     object 
 7   Health          64 non-null     float64
 8   Rarity          64 non-null     object 
 9   Class           64 non-null     object 
 10  Movement_Speed  64 non-null     object 
 11  Range           63 non-null     object 
 12  Reload          63 non-null     object 
 13  Speed_Num       64 non-null     int64  
 14  Range_Num       64 non-null     float64
 15  Reload_Num      63 non-null     float64
 16  Role            64 non-null     object 
dtypes: float64(3), int64(2), object(12)
m

In [64]:
# export dataframe final brawlers
brawlerStats.to_parquet('datasets/brawlers/brawlers_stats.parquet', index=False, engine='fastparquet', compression='gzip')

print('dimensiones brawlerStats: ' + str(brawlerStats.shape))

dimensiones brawlerStats: (64, 17)


In [65]:
countryCode = ['US','MX','BR','GB','CA','DE','FR','ES','IT','RU','TR','AR','PL','CO','IN','ID','UA','AU','NL','JP','KR','CZ','CH','PH','MY','VN','IE',]#'TH','IL','NO','FI','PT','AT','GR','HU','SG','SA','AE','SE','DK','BZ','CR','GT','HN','NI','PA','SV','BO','CL','EC']

In [66]:
# sacar el player tag de los top players
top_player = []

# top global
leaderboard = client.get_rankings(ranking='players')
for i in leaderboard:
    top_player.append({'tag': i.tag, 'trophies': i.trophies, 'rank_type': 'global'})

# top por regiones en countryCode
for i, item in enumerate(countryCode):
   leaderboard = client.get_rankings(ranking='players',region=item)
   for k in leaderboard:
    top_player.append({'tag': k.tag, 'trophies': k.trophies, 'rank_type': item})

top_player = pd.DataFrame(top_player).drop_duplicates(subset='tag', keep='first').reset_index(drop=True)

print('cantidad top player tag: ' + str(len(top_player)))

# exportar dataset en parquet
top_player.to_parquet('datasets/players/top_player.parquet', index=False, engine='fastparquet', compression='gzip')

cantidad top player tag: 200


In [67]:
# sacar el club de los top clubs
top_club = []

# top global
leaderboard = client.get_rankings(ranking='clubs')
for i in leaderboard:
    top_club.append({'tag': i.tag, 'trophies': i.trophies, 'rank_type': 'global'})

# top por regiones en countryCode
for i, item in enumerate(countryCode):
   leaderboard = client.get_rankings(ranking='players',region=item)
   for k in leaderboard:
    top_club.append({'tag': k.tag, 'trophies': k.trophies, 'rank_type': 'region'})

top_club = pd.DataFrame(top_club).drop_duplicates(subset='tag', keep='first').reset_index(drop=True)

print('cantidad top club tag: ' + str(len(top_club)))

# exportar dataset en parquet
top_club.to_parquet('datasets/clubs/top_club.parquet', index=False, engine='fastparquet', compression='gzip')

cantidad top club tag: 5600


In [68]:
# importar battelog de api

data = {}

for i, playertag in tqdm(enumerate(top_player['tag']), total = len(top_player['tag']), colour='brown'):
    json_battlelog = {}
    try:
        json_battlelog = client.get_battle_logs(playertag).raw_data
    except:
        print("No se pudo recuperar battlelog de tag " + playertag)
    
    for k in range(len(json_battlelog)):
        loaded_json = json_battlelog[k]
        loaded_json['playertag'] = playertag
        data[str(i) + '-' + str(k)] = loaded_json

battlelog = pd.DataFrame.from_dict(data, orient='index').reset_index(drop=True)

  0%|          | 0/200 [00:00<?, ?it/s]

No se pudo recuperar battlelog de tag #CPYUR8QR


In [69]:
# creación del dataframe
#battlelog = pd.DataFrame()

#def split_json(s):
#	# separa un string por el item "
#	s = str(s).split('"')
#
#	return s

#def clean_json(jsonitem):
#	# para una lista de artiuclos de json, separa en " y limpia los elementos impares de la lista resultante 
#	for i in range(len(jsonitem)):
#		splitted = split_json(jsonitem[i])
#		for j in range(len(splitted)):
#			splitted[j] = str(splitted[j]).replace('None','"None"').replace('False','"False"').replace('True','"True"')
#			if j % 2 == 1 and len(splitted[j]) < 20:
#				splitted[j] = '"' + str(splitted[j]).replace("'",'`') + '"'
#			else:
#				splitted[j] = str(splitted[j]).replace("'",'"')
#		jsonitem[i] = ''.join(splitted)
#	
#	return jsonitem

#def cleanjson(val):
#	result = str(val).replace('"',"`").replace(': `', ": '").replace('`,',"',").replace('`}',"'}").replace("e's",'e`s').replace('"ll','`ll').replace("I'm", 'I`m').replace("i'm", 'i`m').replace("' ","` ").replace("t's","t`s").replace('None',"'None'").replace("'",'"')
#	return result

# progress = widgets.IntProgress(
# 	value=0, 
# 	min=0, 
# 	max=100, 
# 	description='cargando:',
# 	style={'bar_color': 'maroon'}) 

# processed = widgets.BoundedFloatText(
#     value=0,
#     min=0,
#     max=len(topplayer_tag),
#     description='procesado:',
#     disabled=False
# )

# display(progress)
# display(processed)

# for i in range(len(topplayer_tag)):

# 	json_battlelog = {}
# 	playertag = topplayer_tag[i]
# 	try:
# 		json_battlelog = client.get_battle_logs(playertag).raw_data
# 	except:
# 		print("No se pudo recuperar battlelog de tag " + playertag)

# 	for k in range(len(json_battlelog)):
# 		loaded_json = json_battlelog[k]
# 		json_battlelog[k]['playertag'] = playertag
# 		try:
# 			battlelog = battlelog.append(pd.json_normalize(loaded_json), ignore_index=True)
# 		except:
# 			print("no se pudo importar " + playertag + " battlelog numero " + str(k))

# 	progress.value = ((i+1) / len(topplayer_tag)) * 100
# 	processed.value = i


## Data Depuration

In [70]:
# desglose de dataframe
battlelog = pd.merge(left=battlelog, right=pd.json_normalize(battlelog['event']).add_prefix('event.'), left_index=True, right_index=True)
battlelog = pd.merge(left=battlelog, right=pd.json_normalize(battlelog['battle']).add_prefix('battle.'), left_index=True, right_index=True)

In [71]:
# reset battlelog index
battlelog = battlelog.drop('event', axis=1)
battlelog = battlelog.drop('battle', axis=1)
print('dimensiones battlelog: ' + str(battlelog.shape))

dimensiones battlelog: (4880, 26)


In [72]:
battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4880 entries, 0 to 4879
Data columns (total 26 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   battleTime                          4880 non-null   object 
 1   playertag                           4880 non-null   object 
 2   event.id                            4880 non-null   int64  
 3   event.mode                          4732 non-null   object 
 4   event.map                           4732 non-null   object 
 5   battle.mode                         4880 non-null   object 
 6   battle.type                         4799 non-null   object 
 7   battle.result                       4034 non-null   object 
 8   battle.duration                     4051 non-null   float64
 9   battle.trophyChange                 3356 non-null   float64
 10  battle.teams                        4497 non-null   object 
 11  battle.starPlayer.tag               3252 no

In [73]:
# cuenta tipos de juego
battlelog['battle.type'].value_counts()

ranked                   3289
friendly                  519
soloRanked                480
teamRanked                448
championshipChallenge      58
challenge                   5
Name: battle.type, dtype: int64

In [74]:
# eliminar friendly
battlelog = battlelog.loc[battlelog['battle.type'] != "friendly"]

In [75]:
# cuenta modos de juego
battlelog['battle.mode'].value_counts() 

hotZone         1054
brawlBall       1033
gemGrab          553
duoShowdown      463
knockout         386
soloShowdown     289
heist            271
bounty           231
bigGame           81
Name: battle.mode, dtype: int64

In [76]:
# eliminar modos alt y modos showdown
modos_alt = ['bossFight','roboRumble','bigGame','soloShowdown','duoShowdown']
battlelog = battlelog.loc[~battlelog['battle.mode'].isin(modos_alt)]

In [77]:
# eliminar event vacio
battlelog['event.mode'] = battlelog['event.mode'].fillna('unknown')
battlelog = battlelog.loc[battlelog['event.mode'] != "unknown"]

In [78]:
# eliminar map vacio
battlelog['event.map'] = battlelog['event.map'].fillna('unknown')
battlelog = battlelog.loc[battlelog['event.map'] != "unknown"]

In [79]:
# reset battlelog index
battlelog.reset_index(drop=True, inplace=True)

print('dimensiones battlelog: ' + str(battlelog.shape))

dimensiones battlelog: (3476, 26)


In [80]:
# player = {}

# npl = top_player['tag'][0:100].to_list()

# for i, playertag in enumerate(npl):
# 	profile = client.get_profile(playertag)
# 	player[str(i)] = {
# 		'tag': playertag, 
# 		'team_victories': profile.team_victories, 
# 		'highestTrophies': profile.highest_trophies, 
# 		'expPoints': profile.exp_points, 
# 		'trophies': profile.trophies,
# 		'datetime': datetime.datetime.now()
# 		}

# players = pd.DataFrame.from_dict(player, orient='index').reset_index(drop=True)
# players

In [81]:
# descomponer la columna teams

# def battle_delimiter(i, j):
# 	result = pd.json_normalize(pd.json_normalize(pd.json_normalize(pd.json_normalize(rawdata['battle'])['teams'])[i])[j])
# 	return result

def normalize_to_df(i, t, p):
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.tag'] = normalized[t - 1][p - 1]['tag']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.name'] = normalized[t - 1][p - 1]['name']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.brawler.id'] = normalized[t - 1][p - 1]['brawler.id']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.brawler.name'] = normalized[t - 1][p - 1]['brawler.name']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.brawler.power'] = normalized[t - 1][p - 1]['brawler.power']
	battlelog.loc[i,'battle.team' + str(t) + '.player' + str(p) + '.brawler.trophies'] = normalized[t - 1][p - 1]['brawler.trophies']

normalized = pd.DataFrame()

for i, team in enumerate(battlelog['battle.teams']):
	if team != None:
		try:
			normalized = pd.json_normalize(team, errors='ignore').transpose()
			normalize_to_df(i, 1, 1)
			normalize_to_df(i, 1, 2)
			normalize_to_df(i, 1, 3)
			normalize_to_df(i, 2, 1)
			normalize_to_df(i, 2, 2)
			normalize_to_df(i, 2, 3)
		except:
			print("no se pudo transponer")

battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3476 entries, 0 to 3475
Data columns (total 62 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   battleTime                             3476 non-null   object 
 1   playertag                              3476 non-null   object 
 2   event.id                               3476 non-null   int64  
 3   event.mode                             3476 non-null   object 
 4   event.map                              3476 non-null   object 
 5   battle.mode                            3476 non-null   object 
 6   battle.type                            3476 non-null   object 
 7   battle.result                          3476 non-null   object 
 8   battle.duration                        3476 non-null   float64
 9   battle.trophyChange                    2655 non-null   float64
 10  battle.teams                           3476 non-null   object 
 11  batt

In [82]:
# define tags
tags = pd.concat([
	battlelog['battle.team1.player1.tag']
	,battlelog['battle.team1.player2.tag']
	,battlelog['battle.team1.player3.tag']
	,battlelog['battle.team2.player1.tag']
	,battlelog['battle.team2.player2.tag']
	,battlelog['battle.team2.player3.tag']])
tags = tags.drop_duplicates().reset_index(drop=True)
tags.shape

(7401,)

In [83]:
# import players dataset
player = {}

top_player_list = tags.to_list()

def get_profile(playertag):
	profile = client.get_profile(playertag)
	return {
		'tag': playertag, 
		'team_victories': profile.team_victories, 
		'highestTrophies': profile.highest_trophies, 
		'expPoints': profile.exp_points, 
		'trophies': profile.trophies,
		'datetime': datetime.datetime.now()
		}

with cf.ThreadPoolExecutor(max_workers=40) as executor:
	future_to_player = {executor.submit(get_profile, playertag): playertag for playertag in top_player_list}
	for future in tqdm(cf.as_completed(future_to_player), total = len(top_player_list), colour='brown'):
		try:
			i = top_player_list.index(future_to_player[future])
			player[str(i)] = future.result()
		except:
			pass

players = pd.DataFrame.from_dict(player, orient='index').reset_index(drop=True)


  0%|          | 0/7401 [00:00<?, ?it/s]

In [84]:
# importar historico de players
players_hist = pd.read_parquet('datasets/players/players.parquet')

print('dimensiones players hist: ' + str(players_hist.shape))

dimensiones players hist: (819335, 6)


In [85]:
# concatenar las bases
players = pd.concat([players_hist, players], ignore_index=True) \
	.drop_duplicates(subset='tag', keep='last') \
	.reset_index(drop=True)

print('dimensiones players: ' + str(players.shape))

dimensiones players: (819620, 6)


In [86]:
# export players
# players.to_csv('datasets/players/players.csv')
players.to_parquet('datasets/players/players.parquet', index=False, engine='fastparquet', compression='gzip')

In [87]:
# fix column names
battlelog.columns = battlelog.columns.str.replace('.', '_', regex=True)

In [88]:
# select columns
battlelog = battlelog[[
'battleTime'
,'playertag'
,'event_mode'
,'event_map'
,'battle_type'
,'battle_result'
,'battle_duration'
,'battle_trophyChange'
,'battle_team1_player1_tag'
,'battle_team1_player1_name'
,'battle_team1_player1_brawler_id'
,'battle_team1_player1_brawler_name'
,'battle_team1_player1_brawler_power'
,'battle_team1_player1_brawler_trophies'
,'battle_team1_player2_tag'
,'battle_team1_player2_name'
,'battle_team1_player2_brawler_id'
,'battle_team1_player2_brawler_name'
,'battle_team1_player2_brawler_power'
,'battle_team1_player2_brawler_trophies'
,'battle_team1_player3_tag'
,'battle_team1_player3_name'
,'battle_team1_player3_brawler_id'
,'battle_team1_player3_brawler_name'
,'battle_team1_player3_brawler_power'
,'battle_team1_player3_brawler_trophies'
,'battle_team2_player1_tag'
,'battle_team2_player1_name'
,'battle_team2_player1_brawler_id'
,'battle_team2_player1_brawler_name'
,'battle_team2_player1_brawler_power'
,'battle_team2_player1_brawler_trophies'
,'battle_team2_player2_tag'
,'battle_team2_player2_name'
,'battle_team2_player2_brawler_id'
,'battle_team2_player2_brawler_name'
,'battle_team2_player2_brawler_power'
,'battle_team2_player2_brawler_trophies'
,'battle_team2_player3_tag'
,'battle_team2_player3_name'
,'battle_team2_player3_brawler_id'
,'battle_team2_player3_brawler_name'
,'battle_team2_player3_brawler_power'
,'battle_team2_player3_brawler_trophies'
]]

In [89]:
# traer archivo histórico battlelog
# with zipfile.ZipFile('datasets/teams/battlelog_teams.zip', 'r') as zip_ref:
    # zip_ref.extractall('datasets/teams/')
    
battlelog_hist = pd.read_parquet('datasets/teams/battlelog_teams.parquet')

print('dimensiones battlelog hist: ' + str(battlelog_hist.shape))

dimensiones battlelog hist: (753306, 44)


In [90]:
# agregar nuevos reg a histórico
battlelog = pd.concat([battlelog, battlelog_hist])
print('dimensiones battlelog concat: ' + str(battlelog.shape))

dimensiones battlelog concat: (756782, 44)


In [91]:
# eliminar battelogs duplicados
battlelog = battlelog.drop_duplicates(['battleTime', 'event_mode', 'event_map', 'battle_type', 'battle_duration', 'battle_team1_player1_tag'], ignore_index=True)

print('dimensiones battlelog final: ' + str(battlelog.shape))

dimensiones battlelog final: (693410, 44)


In [92]:
# set correct dtypes
battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 693410 entries, 0 to 693409
Data columns (total 44 columns):
 #   Column                                 Non-Null Count   Dtype  
---  ------                                 --------------   -----  
 0   battleTime                             693410 non-null  object 
 1   playertag                              693410 non-null  object 
 2   event_mode                             693410 non-null  object 
 3   event_map                              693410 non-null  object 
 4   battle_type                            693410 non-null  object 
 5   battle_result                          693410 non-null  object 
 6   battle_duration                        693410 non-null  Float64
 7   battle_trophyChange                    524221 non-null  Float64
 8   battle_team1_player1_tag               693410 non-null  object 
 9   battle_team1_player1_name              693408 non-null  object 
 10  battle_team1_player1_brawler_id        693410 non-null  

In [93]:
# assign correct dtypes
battlelog['battleTime'] = pd.to_datetime(battlelog['battleTime'], format='%Y-%m-%d').dt.date.astype('datetime64[ns]')
battlelog['battle_duration'] = battlelog['battle_duration'].astype('Int16')
battlelog['battle_trophyChange'] = battlelog['battle_trophyChange'].astype('Int8')
battlelog['battle_team1_player1_brawler_id'] = battlelog['battle_team1_player1_brawler_id'].astype('Int32')
battlelog['battle_team1_player1_brawler_power'] = battlelog['battle_team1_player1_brawler_power'].astype('Int8')
battlelog['battle_team1_player1_brawler_trophies'] = battlelog['battle_team1_player1_brawler_trophies'].astype('Int16')
battlelog['battle_team1_player2_brawler_id'] = battlelog['battle_team1_player2_brawler_id'].astype('Int32')
battlelog['battle_team1_player2_brawler_power'] = battlelog['battle_team1_player2_brawler_power'].astype('Int8')
battlelog['battle_team1_player2_brawler_trophies'] = battlelog['battle_team1_player2_brawler_trophies'].astype('Int16')
battlelog['battle_team1_player3_brawler_id'] = battlelog['battle_team1_player3_brawler_id'].astype('Int32')
battlelog['battle_team1_player3_brawler_power'] = battlelog['battle_team1_player3_brawler_power'].astype('Int8')
battlelog['battle_team1_player3_brawler_trophies'] = battlelog['battle_team1_player3_brawler_trophies'].astype('Int16')
battlelog['battle_team2_player1_brawler_id'] = battlelog['battle_team2_player1_brawler_id'].astype('Int32')
battlelog['battle_team2_player1_brawler_power'] = battlelog['battle_team2_player1_brawler_power'].astype('Int8')
battlelog['battle_team2_player1_brawler_trophies'] = battlelog['battle_team2_player1_brawler_trophies'].astype('Int16')
battlelog['battle_team2_player2_brawler_id'] = battlelog['battle_team2_player2_brawler_id'].astype('Int32')
battlelog['battle_team2_player2_brawler_power'] = battlelog['battle_team2_player2_brawler_power'].astype('Int8')
battlelog['battle_team2_player2_brawler_trophies'] = battlelog['battle_team2_player2_brawler_trophies'].astype('Int16')
battlelog['battle_team2_player3_brawler_id'] = battlelog['battle_team2_player3_brawler_id'].astype('Int32')
battlelog['battle_team2_player3_brawler_power'] = battlelog['battle_team2_player3_brawler_power'].astype('Int8')
battlelog['battle_team2_player3_brawler_trophies'] = battlelog['battle_team2_player3_brawler_trophies'].astype('Int16')

battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 693410 entries, 0 to 693409
Data columns (total 44 columns):
 #   Column                                 Non-Null Count   Dtype         
---  ------                                 --------------   -----         
 0   battleTime                             693410 non-null  datetime64[ns]
 1   playertag                              693410 non-null  object        
 2   event_mode                             693410 non-null  object        
 3   event_map                              693410 non-null  object        
 4   battle_type                            693410 non-null  object        
 5   battle_result                          693410 non-null  object        
 6   battle_duration                        693410 non-null  Int16         
 7   battle_trophyChange                    524221 non-null  Int8          
 8   battle_team1_player1_tag               693410 non-null  object        
 9   battle_team1_player1_name              693408 no

In [94]:
# assign category dtypes
battlelog['battle_team1_player1_brawler_name'] = battlelog['battle_team1_player1_brawler_name'].astype('category')
battlelog['battle_team1_player2_brawler_name'] = battlelog['battle_team1_player2_brawler_name'].astype('category')
battlelog['battle_team1_player3_brawler_name'] = battlelog['battle_team1_player3_brawler_name'].astype('category')
battlelog['battle_team2_player1_brawler_name'] = battlelog['battle_team2_player1_brawler_name'].astype('category')
battlelog['battle_team2_player2_brawler_name'] = battlelog['battle_team2_player2_brawler_name'].astype('category')
battlelog['battle_team2_player3_brawler_name'] = battlelog['battle_team2_player3_brawler_name'].astype('category')
battlelog['event_mode'] = battlelog['event_mode'].astype('category')
battlelog['event_map'] = battlelog['event_map'].astype('category')
battlelog['battle_type'] = battlelog['battle_type'].astype('category')
battlelog['battle_result'] = battlelog['battle_result'].astype('category')

battlelog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 693410 entries, 0 to 693409
Data columns (total 44 columns):
 #   Column                                 Non-Null Count   Dtype         
---  ------                                 --------------   -----         
 0   battleTime                             693410 non-null  datetime64[ns]
 1   playertag                              693410 non-null  object        
 2   event_mode                             693410 non-null  category      
 3   event_map                              693410 non-null  category      
 4   battle_type                            693410 non-null  category      
 5   battle_result                          693410 non-null  category      
 6   battle_duration                        693410 non-null  Int16         
 7   battle_trophyChange                    524221 non-null  Int8          
 8   battle_team1_player1_tag               693410 non-null  object        
 9   battle_team1_player1_name              693408 no

In [95]:
# export dataset teams completo mas histórico
battlelog.to_parquet('datasets/teams/battlelog_teams.parquet', index=False, engine='fastparquet', compression='gzip')

# Comprimir el archivo parquet
# with zipfile.ZipFile('datasets/teams/battlelog_teams.zip', 'w') as zip_file:
#     zip_file.write('datasets/teams/battlelog_teams.parquet', arcname='battlelog_teams.parquet',compress_type=zipfile.ZIP_BZIP2)

# os.remove('datasets/teams/battlelog_teams.parquet')

In [96]:
maplist = battlelog[['event_mode','event_map']].drop_duplicates()

maplist.to_parquet('datasets/maps/maplist.parquet', index=False)