# Cria base de dados para exemplo

* Uma observação para cada segundo entre 1-Jan-2021 até 28-Fev-2021.
* Um arquivo CSV para cada dia do mês (31 para Janeiro + 28 para Fevereiro = 59 no total)
* Colunas: _Timestamp_ + 5 colunas numéricas aleatórias + 1 coluna categórica

In [1]:
import pandas as pd
import numpy as np

dates = pd.date_range(start="1-1-2021", end="2-28-2021")

for dt in dates:
    ind_day = pd.date_range(start="%s 0:0:0"%dt.date(), end="%s 23:59:59"%dt.date(), freq="S")
    data = np.random.rand(len(ind_day), 5)

    cls = np.random.choice(["Class1", "Class2", "Class3", "Class4","Class5"], size=len(ind_day))

    df = pd.DataFrame(data=data, columns=["A", "B", "C", "D", "E"], index=ind_day)
    df["Type"] = cls

    df.to_csv("dask_dataframes/%s.csv"%dt.date())

In [2]:
import os
os.listdir("dask_dataframes/")

['2021-01-01.csv',
 '2021-01-02.csv',
 '2021-01-03.csv',
 '2021-01-04.csv',
 '2021-01-05.csv',
 '2021-01-06.csv',
 '2021-01-07.csv',
 '2021-01-08.csv',
 '2021-01-09.csv',
 '2021-01-10.csv',
 '2021-01-11.csv',
 '2021-01-12.csv',
 '2021-01-13.csv',
 '2021-01-14.csv',
 '2021-01-15.csv',
 '2021-01-16.csv',
 '2021-01-17.csv',
 '2021-01-18.csv',
 '2021-01-19.csv',
 '2021-01-20.csv',
 '2021-01-21.csv',
 '2021-01-22.csv',
 '2021-01-23.csv',
 '2021-01-24.csv',
 '2021-01-25.csv',
 '2021-01-26.csv',
 '2021-01-27.csv',
 '2021-01-28.csv',
 '2021-01-29.csv',
 '2021-01-30.csv',
 '2021-01-31.csv',
 '2021-02-01.csv',
 '2021-02-02.csv',
 '2021-02-03.csv',
 '2021-02-04.csv',
 '2021-02-05.csv',
 '2021-02-06.csv',
 '2021-02-07.csv',
 '2021-02-08.csv',
 '2021-02-09.csv',
 '2021-02-10.csv',
 '2021-02-11.csv',
 '2021-02-12.csv',
 '2021-02-13.csv',
 '2021-02-14.csv',
 '2021-02-15.csv',
 '2021-02-16.csv',
 '2021-02-17.csv',
 '2021-02-18.csv',
 '2021-02-19.csv',
 '2021-02-20.csv',
 '2021-02-21.csv',
 '2021-02-22

# Cria cluster do dask

In [3]:
from dask.distributed import Client

client = Client(n_workers=2, threads_per_worker=2, memory_limit="1GB")
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 2
Total threads: 4,Total memory: 1.86 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:57199,Workers: 2
Dashboard: http://127.0.0.1:8787/status,Total threads: 4
Started: Just now,Total memory: 1.86 GiB

0,1
Comm: tcp://127.0.0.1:57214,Total threads: 2
Dashboard: http://127.0.0.1:57217/status,Memory: 0.93 GiB
Nanny: tcp://127.0.0.1:57203,
Local directory: C:\Users\felbu\Documents\Python_EPGE_2022\10-Aulas\dask-worker-space\worker-0cntw_74,Local directory: C:\Users\felbu\Documents\Python_EPGE_2022\10-Aulas\dask-worker-space\worker-0cntw_74

0,1
Comm: tcp://127.0.0.1:57215,Total threads: 2
Dashboard: http://127.0.0.1:57216/status,Memory: 0.93 GiB
Nanny: tcp://127.0.0.1:57202,
Local directory: C:\Users\felbu\Documents\Python_EPGE_2022\10-Aulas\dask-worker-space\worker-a_2k6dmz,Local directory: C:\Users\felbu\Documents\Python_EPGE_2022\10-Aulas\dask-worker-space\worker-a_2k6dmz


# Lê base de dados

In [4]:
import dask.dataframe as dd

In [5]:
df = dd.read_csv('dask_dataframes/2021-*-*.csv')

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,A,B,C,D,E,Type
0,2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,Class4
1,2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1
2,2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,Class2
3,2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,Class2
4,2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1


In [7]:
df.tail()

Unnamed: 0.1,Unnamed: 0,A,B,C,D,E,Type
86395,2021-02-28 23:59:55,0.467637,0.3357,0.556982,0.613883,0.022757,Class2
86396,2021-02-28 23:59:56,0.288385,0.547262,0.233291,0.904265,0.854336,Class4
86397,2021-02-28 23:59:57,0.387114,0.430303,0.223098,0.290961,0.154756,Class2
86398,2021-02-28 23:59:58,0.090263,0.858134,0.640929,0.271057,0.219644,Class1
86399,2021-02-28 23:59:59,0.745424,0.790635,0.937931,0.243355,0.491579,Class2


In [8]:
df.dtypes

Unnamed: 0     object
A             float64
B             float64
C             float64
D             float64
E             float64
Type           object
dtype: object

In [9]:
df = df.rename(columns={'Unnamed: 0':'date'}).set_index('date')

In [10]:
df.head()

Unnamed: 0_level_0,A,B,C,D,E,Type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,Class4
2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1
2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,Class2
2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,Class2
2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1


In [11]:
df.tail()

Unnamed: 0_level_0,A,B,C,D,E,Type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-02-28 23:59:55,0.467637,0.3357,0.556982,0.613883,0.022757,Class2
2021-02-28 23:59:56,0.288385,0.547262,0.233291,0.904265,0.854336,Class4
2021-02-28 23:59:57,0.387114,0.430303,0.223098,0.290961,0.154756,Class2
2021-02-28 23:59:58,0.090263,0.858134,0.640929,0.271057,0.219644,Class1
2021-02-28 23:59:59,0.745424,0.790635,0.937931,0.243355,0.491579,Class2


In [12]:
df.dtypes

A       float64
B       float64
C       float64
D       float64
E       float64
Type     object
dtype: object

In [13]:
df.columns

Index(['A', 'B', 'C', 'D', 'E', 'Type'], dtype='object')

In [14]:
#Não está como datetime!
df.index

Dask Index Structure:
npartitions=59
2021-01-01 00:00:00    object
2021-01-02 00:00:00       ...
                        ...  
2021-02-28 00:00:00       ...
2021-02-28 23:59:59       ...
Name: date, dtype: object
Dask Name: sort_index, 295 tasks

In [15]:
df = dd.read_csv("dask_dataframes/2021-*-*.csv",
                 names=["date", "A", "B", "C", "D", "E", "Type"],
                 skiprows=1,
                 parse_dates=["date",]
                )

In [16]:
df.head()

Unnamed: 0,date,A,B,C,D,E,Type
0,2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,Class4
1,2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1
2,2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,Class2
3,2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,Class2
4,2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1


In [17]:
df.dtypes

date    datetime64[ns]
A              float64
B              float64
C              float64
D              float64
E              float64
Type            object
dtype: object

In [18]:
df = df.set_index('date')

In [19]:
df.head()

Unnamed: 0_level_0,A,B,C,D,E,Type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,Class4
2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1
2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,Class2
2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,Class2
2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1


In [20]:
df.dtypes

A       float64
B       float64
C       float64
D       float64
E       float64
Type     object
dtype: object

# Delayed computation

## Exemplo 1

In [21]:
df['A'].max()

dd.Scalar<series-..., dtype=float64>

In [22]:
df['A'].max().compute()

0.9999999931358396

## Exemplo 2

In [23]:
df.groupby('Type').mean()

Unnamed: 0_level_0,A,B,C,D,E
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,float64,float64,float64,float64,float64
,...,...,...,...,...


In [24]:
df.groupby('Type').mean().compute()

Unnamed: 0_level_0,A,B,C,D,E
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Class1,0.499745,0.49988,0.49996,0.49972,0.499834
Class2,0.499628,0.499913,0.50043,0.500107,0.499778
Class3,0.49971,0.49986,0.499722,0.49966,0.499885
Class4,0.499992,0.499762,0.499931,0.500238,0.499909
Class5,0.500198,0.500491,0.50011,0.500031,0.499649


## Exemplo 3

In [25]:
df['F'] = df['A'] + df['B']
df['F'] = df['F'] - df['F'].mean()
media = df['F'].mean()
media

dd.Scalar<series-..., dtype=float64>

In [26]:
media.compute()

-3.396599455626593e-17

## Exceções (métodos com compute implícito)

In [27]:
df.head()

Unnamed: 0_level_0,A,B,C,D,E,Type,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,Class4,-0.012974
2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1,0.585605
2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,Class2,-0.032076
2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,Class2,-0.163053
2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1,-0.620377


In [28]:
df.tail()

Unnamed: 0_level_0,A,B,C,D,E,Type,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-02-28 23:59:55,0.467637,0.3357,0.556982,0.613883,0.022757,Class2,-0.196498
2021-02-28 23:59:56,0.288385,0.547262,0.233291,0.904265,0.854336,Class4,-0.164189
2021-02-28 23:59:57,0.387114,0.430303,0.223098,0.290961,0.154756,Class2,-0.182419
2021-02-28 23:59:58,0.090263,0.858134,0.640929,0.271057,0.219644,Class1,-0.051439
2021-02-28 23:59:59,0.745424,0.790635,0.937931,0.243355,0.491579,Class2,0.536223


In [29]:
#Cuidado
df.sample(frac=1e-5)

Unnamed: 0_level_0,A,B,C,D,E,Type,F
npartitions=59,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 00:00:00,float64,float64,float64,float64,float64,object,float64
2021-01-02 00:00:00,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...
2021-02-28 00:00:00,...,...,...,...,...,...,...
2021-02-28 23:59:59,...,...,...,...,...,...,...


In [30]:
df.sample(frac=1e-5).compute()

Unnamed: 0_level_0,A,B,C,D,E,Type,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 18:33:30,0.483062,0.709097,0.796107,0.218921,0.489258,Class4,0.192323
2021-01-02 14:22:29,0.116217,0.261472,0.678423,0.201915,0.115074,Class5,-0.622147
2021-01-03 16:04:33,0.730987,0.043068,0.496807,0.4901,0.933376,Class2,-0.225781
2021-01-04 03:25:59,0.957427,0.334307,0.860084,0.952364,0.770504,Class5,0.291899
2021-01-05 22:02:39,0.886837,0.807524,0.556435,0.280038,0.851074,Class5,0.694526
2021-01-06 07:55:17,0.389044,0.156237,0.308806,0.954369,0.766518,Class1,-0.454555
2021-01-07 18:34:25,0.640765,0.467857,0.685782,0.43147,0.261235,Class5,0.108786
2021-01-08 02:25:45,0.258969,0.501502,0.310805,0.680422,0.131333,Class2,-0.239365
2021-01-09 23:37:50,0.679106,0.167394,0.38236,0.866324,0.533061,Class4,-0.153336
2021-01-10 06:18:48,0.118818,0.279116,0.081194,0.228224,0.580099,Class4,-0.601901


# Semelhanças com pandas

A grande vantagem de Dask sobre, por exemplo, Spark

## Filtrar linhas

In [31]:
df_1 = df[df.Type == 'Class1']

In [32]:
df_1.head()

Unnamed: 0_level_0,A,B,C,D,E,Type,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1,0.585605
2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1,-0.620377
2021-01-01 00:00:05,0.449564,0.076249,0.07533,0.508997,0.757593,Class1,-0.474022
2021-01-01 00:00:12,0.253887,0.37672,0.825048,0.950216,0.279805,Class1,-0.369229
2021-01-01 00:00:19,0.257207,0.67128,0.433995,0.955925,0.189143,Class1,-0.071348


## Selecionar colunas

In [33]:
df_num = df[['A','B','C','D','E','F']]

In [34]:
df_num.head()

Unnamed: 0_level_0,A,B,C,D,E,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,-0.012974
2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,0.585605
2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,-0.032076
2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,-0.163053
2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,-0.620377


## Estatísticas simples

In [35]:
df.A.mean().compute()

0.49985444309226584

## Group by

In [36]:
df[['Type','A','B','C']].groupby('Type').agg(['mean','std']).compute()

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,mean,std,mean,std,mean,std
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Class1,0.499745,0.288778,0.49988,0.288746,0.49996,0.288579
Class2,0.499628,0.288792,0.499913,0.288743,0.50043,0.288445
Class3,0.49971,0.288795,0.49986,0.288648,0.499722,0.288825
Class4,0.499992,0.288698,0.499762,0.288727,0.499931,0.28857
Class5,0.500198,0.288784,0.500491,0.288733,0.50011,0.288791


# Resampling

In [37]:
df_hora = df.resample("1H")

In [38]:
df_media_hora = df_hora.mean()

  meta = getattr(meta_r, how)(*how_args, **how_kwargs)


In [39]:
df_media_hora

Unnamed: 0_level_0,A,B,C,D,E,F
npartitions=59,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 00:00:00,float64,float64,float64,float64,float64,float64
2021-01-02 00:00:00,...,...,...,...,...,...
...,...,...,...,...,...,...
2021-02-28 00:00:00,...,...,...,...,...,...
2021-02-28 23:00:00,...,...,...,...,...,...


In [40]:
df_media_hora.head()

Unnamed: 0_level_0,A,B,C,D,E,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 00:00:00,0.499349,0.494756,0.495592,0.495396,0.498922,-0.005731
2021-01-01 01:00:00,0.503274,0.509591,0.49193,0.496885,0.48944,0.013029
2021-01-01 02:00:00,0.500421,0.503928,0.500714,0.505299,0.496326,0.004513
2021-01-01 03:00:00,0.504126,0.502633,0.499923,0.496024,0.505823,0.006924
2021-01-01 04:00:00,0.49503,0.501844,0.502325,0.496229,0.500867,-0.002962


# Partições

In [41]:
df.npartitions

59

In [42]:
df.partitions[0]

Unnamed: 0_level_0,A,B,C,D,E,Type,F
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01,float64,float64,float64,float64,float64,object,float64
2021-01-02,...,...,...,...,...,...,...


In [43]:
df.partitions[0].head()

Unnamed: 0_level_0,A,B,C,D,E,Type,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,Class4,-0.012974
2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1,0.585605
2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,Class2,-0.032076
2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,Class2,-0.163053
2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1,-0.620377


In [44]:
#df = df.set_index('date')

In [45]:
df_hora = df.repartition(freq="1H")

In [46]:
df_hora.npartitions

1416

In [47]:
#Visualmente, não muda nada
df_hora.head()

Unnamed: 0_level_0,A,B,C,D,E,Type,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,Class4,-0.012974
2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1,0.585605
2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,Class2,-0.032076
2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,Class2,-0.163053
2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1,-0.620377


In [48]:
df_semana = df.repartition(freq='7D')
df_semana.npartitions

9

In [49]:
#Aplicar uma função a cada partição
df_semana.map_partitions(lambda x: x.A.mean())

Dask Series Structure:
npartitions=9
2021-01-01 00:00:00    float64
2021-01-07 00:00:00        ...
                        ...   
2021-02-25 00:00:00        ...
2021-02-28 23:59:59        ...
dtype: float64
Dask Name: lambda, 788 tasks

In [50]:
df_semana.map_partitions(lambda x: x.A.mean()).compute()

0    0.499519
1    0.500534
2    0.500180
3    0.499919
4    0.500370
5    0.500467
6    0.499045
7    0.499012
8    0.499404
dtype: float64

# O dashboard do Dask

(mostrar o dashboard)

# Parquet

* Por que Parquet?
* Pyarrow e Fastparquet

In [51]:
!pip install pyarrow



## Salvar

In [52]:
df.head()

Unnamed: 0_level_0,A,B,C,D,E,Type,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,Class4,-0.012974
2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1,0.585605
2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,Class2,-0.032076
2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,Class2,-0.163053
2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1,-0.620377


In [53]:
df.to_parquet('big_data.parq',engine='pyarrow')

[None]

## Ler arquivo

In [54]:
df_2 = dd.read_parquet('big_data.parq', engine='pyarrow')

In [55]:
df_2.head()

Unnamed: 0_level_0,A,B,C,D,E,Type,F
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-01 00:00:00,0.783249,0.203613,0.622932,0.365188,0.591866,Class4,-0.012974
2021-01-01 00:00:01,0.790864,0.794576,0.32275,0.706576,0.355779,Class1,0.585605
2021-01-01 00:00:02,0.570402,0.397358,0.49586,0.776931,0.017251,Class2,-0.032076
2021-01-01 00:00:03,0.580558,0.256224,0.97814,0.211508,0.041214,Class2,-0.163053
2021-01-01 00:00:04,0.203923,0.175535,0.859204,0.468466,0.179885,Class1,-0.620377
