# Alargar o tamanho das células

In [17]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

# Mostrar todos os registos ou apenas x de uma DataFrame

```pandas.set_option('display.max_rows', None)``` - mostrar todos os registos

```pandas.set_option('display.max_rows', 10)``` - mostrar apenas 10 registos

# Mostrar outputs lado a lado - exemplo 1

In [18]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML, display_html

df1 = pd.DataFrame(np.arange(12).reshape((3,4)),columns=['A','B','C','D',])
df2 = pd.DataFrame(np.arange(16).reshape((4,4)),columns=['A','B','C','D',])
df3 = pd.DataFrame(np.arange(40).reshape((10,4)),columns=['A','B','C','D',])

def display_side_by_side(dfs:list, captions:list):
    """Display tables side by side to save vertical space
    Input:
        dfs: list of pandas.DataFrame
        captions: list of table captions
    """
    output = ""
    combined = dict(zip(captions, dfs))
    for caption, df in combined.items():
        output += df.style.set_table_attributes("style='display:inline'").set_caption(caption)._repr_html_()
        output += "\xa0\xa0\xa0"
    display(HTML(output))
 
display_side_by_side([df1, df2, df3], captions=['tabela 1', 'tabela 2', 'tabela 3'])

Unnamed: 0,A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11

Unnamed: 0,A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15

Unnamed: 0,A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19
5,20,21,22,23
6,24,25,26,27
7,28,29,30,31
8,32,33,34,35
9,36,37,38,39


# Mostrar outputs lado a lado - exemplo 2

In [19]:
from ipywidgets import widgets, Layout
from IPython import display
import pandas as pd
import numpy as np

# sample data
df1 = pd.DataFrame(np.random.randn(8, 3))
df2 = pd.DataFrame(np.random.randn(8, 3))

# create output widgets
widget1 = widgets.Output()
widget2 = widgets.Output()

# render in output widgets
with widget1:
    display.display(df1.style.set_caption('First dataframe'))
    df1.info()
with widget2:
    display.display(df2.style.set_caption('Second dataframe'))
    df1.info()


# add some CSS styles to distribute free space
box_layout = Layout(display='flex',
                    flex_flow='row',
                    justify_content='space-around',
                    width='auto'
                   )
    
# create Horisontal Box container
hbox = widgets.HBox([widget1, widget2], layout=box_layout)

# render hbox
hbox

HBox(children=(Output(), Output()), layout=Layout(display='flex', flex_flow='row', justify_content='space-arou…

# Ler vários ficheiros json numa diretoria e carregá-los numa DataFrame

In [15]:
import pandas as pd
from pathlib import Path
import json

a = []

for filename in Path.cwd().glob(r'input/*.json'):
    with open(filename) as json_file:
        data = json.load(json_file)
        a += data
df = pd.DataFrame(a)

# Limpar valores a null

In [None]:
df['BusinessProcessName'].fillna('sem valor', inplace=True)
df['processName'].fillna('sem valor', inplace=True)
df['Erro'].fillna(0, inplace=True)

# Criar coluna nova chamada 'data' selecionando os 1°s 10 bytes do campo timeStamp

In [None]:
df['data'] = df.timeStamp.apply(lambda x: x[:10])

# Eliminar duplicados do campo fingerprint

In [None]:
df.drop_duplicates(subset='fingerprint', keep="last")

# somar os campos 'campo1', 'campo2' e 'campo3' por 'data' e 'nome'

In [None]:
df_total = df.groupby(['data', 'nome'])[['campo1', 'campo2', 'campo3']].sum()