In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import re
from datetime import datetime
import base64
import hashlib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from typing import List, Dict

# Caminho para o arquivo xlsx local
file_path = './banco_dados.xlsx'

In [2]:
def appendData(dataResp: Dict[str, list[str]], key: str, colData: str):
    try:
        dataResp[key].append(colData)
    except KeyError:
        dataResp[key] = [colData]


def trataCol(dataResp: Dict[str, list[str]], colNum: int, colData: str, timeAtual: pd.Timestamp, timeAnterior: pd.Timestamp):
    timeElipsed = (timeAtual - timeAnterior).total_seconds()    
    colData = colData.replace("other;", "").replace("Sucess;", "").replace("Sucess", "")    
    match colNum:
        case 2:
            resp = []
            for index,item in enumerate(colData.split(";")):
                match index:
                    case 0:
                        hora = item
                    case 1:
                        # Calcula o quanto o entrevistado estava errado em noção do tempo
                        appendData(dataResp, 'Resp_1', (timeAtual - pd.to_datetime(f"{item} {hora}", dayfirst=True)).total_seconds())
                        appendData(dataResp, 'Elipsed_1', timeElipsed)
                    case 2:
                        appendData(dataResp, 'Resp_2', item)
                        appendData(dataResp,'Elipsed_2', timeElipsed)                        
                    case 3:
                        appendData(dataResp, 'Resp_3', item.replace("Feminino", "0").replace("Masculino", "1"))
                        appendData(dataResp, 'Elipsed_3', timeElipsed)                        
            return resp
        case _:
            appendData(dataResp, f'Resp_N{colNum}', re.sub(r'<optimized out>#\w+\((Sim|Não)\)', r'\1', colData))
            appendData(dataResp, f'Elipsed_N{colNum}', timeElipsed)                
            return [colData]

In [None]:
# Leitura do arquivo xlsx usando pandas
df = pd.read_excel(file_path, sheet_name='BDados')

# Inicializa variáveis necessárias
dataResp = {}
count = 0
timeAnterior = []  # Inicializa timeAnterior como uma lista vazia

# Loop para processar colunas que começam com "Tela"
for indexCol, colStr in enumerate(df.columns):

    if colStr.startswith("Tela") and df.iloc[0, indexCol] != "" and pd.notna(df.iloc[0, indexCol]):
        timeAtual = []
        timeElipsed = []
        value = []
        
        colNum = int(re.search(r"Tela (\d+)", colStr).group(1))
        
        for indexRow in range(df.shape[0]):
            cell_value = str(df.iloc[indexRow, indexCol])
            match = re.match(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) - (.+)", cell_value)
            
            if match:
                timeAtual.append(pd.to_datetime(match.group(1)))
                if colNum != 1:                                    
                    trataCol(dataResp, colNum, match.group(2), timeAtual[-1], timeAnterior[indexRow]) # globals()[f"func_tela_colNum"](match.group(2))
            else:
                print("Ocorreu um erro ao processar a célula:", cell_value)
        
        timeAnterior = timeAtual 
        
# Cria o novo DataFrame com os dados processados
df_novo = pd.DataFrame(dataResp)

# Usando ExcelWriter para adicionar a nova aba ao arquivo Excel existente com if_sheet_exists='replace'
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    df_novo.to_excel(writer, sheet_name='TDados', index=False)