In [165]:
import pandas as pd
from datetime import datetime


def day_of_week_from_str(date_str):
    # Convertendo a string para um objeto datetime
    date_obj = datetime.strptime(date_str, '%d/%m/%Y')
    return date_obj.weekday() + 1


In [148]:
from production.model import production

def get_category(sentence):
    return production(sentences=[sentence])[0]


'R'

In [225]:
import re
import pandas as pd
from itertools import product
import numpy as np


def convert_price(price):
    if isinstance(price, str):
        return float(re.sub(r'[^\d.-]', '', price).replace(',', '.')) / 100
    return price * -1


def week_of_month(dt):
    first_day = dt.replace(day=1)
    dom = dt.day
    adjusted_dom = dom + first_day.weekday()
    return int(np.ceil(adjusted_dom/7.0))


def day_of_week_from_str(date_str):
    date_obj = datetime.strptime(date_str, '%d/%m/%Y')
    return date_obj.weekday() + 1

def read_and_process_file(file_path):
    df = pd.read_csv(file_path, sep=';', decimal=',',
                     names=['Date', 'Description', 'Price'])
    df["Price"] = df["Price"].apply(convert_price)
    # Supondo que as funções get_category e day_of_week_from_str estejam definidas e corretas
    df["Category"] = df["Description"].apply(get_category)
    df["Day_of_week"] = df["Date"].apply(day_of_week_from_str)


    # # Verifique se estas colunas existem antes de tentar removê-las
    columns_to_drop = [col for col in ['Description','Type', 'Card type'] if col in df.columns]
    df.drop(columns=columns_to_drop, inplace=True)

    df_filtered = df.query('Category == "RESTAURANTES"').copy()
    df_filtered['Date'] = pd.to_datetime(
        df_filtered['Date'], format='%d/%m/%Y')
    df_filtered.set_index('Date', inplace=True)

    df_filtered.sort_index(inplace=True)
    df_filtered['Month'] = df_filtered.index.month
    df_filtered['Year'] = df_filtered.index.year
    df_filtered.sort_values(by=['Month'], inplace=True)
    df_filtered['Week_of_Month'] = df_filtered.index.map(week_of_month)

    df_filtered = df_filtered.groupby(['Day_of_week', 'Week_of_Month', 'Month', 'Year'])[
        'Price'].sum().reset_index()
    
    return df_filtered


# Processando os arquivos
df1 = read_and_process_file('raw_data/01.txt')
df2 = read_and_process_file('raw_data/02.txt')
df3 = read_and_process_file('raw_data/03.txt')
df4 = read_and_process_file('raw_data/04.txt')
df5 = read_and_process_file('raw_data/05.txt')
df6 = read_and_process_file('raw_data/06.txt')
df7 = read_and_process_file('raw_data/07.txt')
df8 = read_and_process_file('raw_data/08.txt')
df9 = read_and_process_file('raw_data/09.txt')
df10 = read_and_process_file('raw_data/10.txt')
df11 = read_and_process_file('raw_data/11.txt')
df12 = read_and_process_file('raw_data/12.txt')

df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12])
df.sort_values(by=['Month', 'Week_of_Month', 'Day_of_week'], inplace=True)
df.head(10)
# df.to_csv('testex.csv', index=False)

Unnamed: 0,Day_of_week,Week_of_Month,Month,Year,Price
0,1,2,1,2023,420.82
3,2,3,1,2023,60.8
5,3,3,1,2023,175.5
1,1,4,1,2023,30.0
4,2,4,1,2023,175.5
7,5,4,1,2023,120.0
2,1,5,1,2023,47.5
6,3,5,1,2023,196.0
8,5,5,1,2023,87.15
6,5,1,2,2023,146.0


In [227]:
import pandas as pd
from itertools import product

# Supondo que 'df' é o seu DataFrame original
# Primeiro, criamos uma lista dos anos únicos presentes no DataFrame para saber até qual ano gerar as datas
years = df['Year'].unique()

# Agora, geramos todas as combinações possíveis de dia da semana, semana do mês, mês e ano
all_combinations = list(product(range(1, 8), range(1, 6), range(1, 13), years))

# Criamos um DataFrame com todas essas combinações
all_dates_df = pd.DataFrame(all_combinations, columns=[
                            'Day_of_week', 'Week_of_Month', 'Month', 'Year'])

# Fazemos um merge com o DataFrame original, preservando todas as combinações de datas
full_df = all_dates_df.merge(
    df, on=['Day_of_week', 'Week_of_Month', 'Month', 'Year'], how='left')

# Substituímos NaN na coluna 'Price' por 0.0
full_df['Price'].fillna(0.0, inplace=True)

# Agora 'full_df' contém todas as datas, com 0.0 para os dias sem movimento
# Você pode querer ordenar o DataFrame por ano, mês, semana do mês e dia da semana
full_df.sort_values(
    by=['Year', 'Month', 'Week_of_Month', 'Day_of_week'], inplace=True)

# E aqui está o DataFrame completo
full_df.head(10)
# Se quiser salvar o novo DataFrame em um arquivo CSV
full_df.to_csv('full_data.csv', index=False)

In [77]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from model.embedding import ExpandValues

# Inicializa os tensores
original_tensor = torch.rand(100, 200, requires_grad=False)

xp = ExpandValues(input_size=1, output_size=2)

# Aplica o modelo ao tensor de entrada
result_tensor = xp(original_tensor)
result_tensor.shape

torch.Size([100, 200, 2])

In [60]:
import torch
import torch.nn as nn
from model.attention import ResidualAttention
from model.feed_forward import ResidualFeedForward
from model.embedding import PositionalEmbedding


class EncoderLayer(nn.Module):
    def __init__(self, embed_dim: int, num_heads: int, dropout: float) -> None:
        """
        Initializes the EncoderLayer module.

        Args:
            embed_dim (int): The dimensionality of the input embeddings.
            num_heads (int): The number of attention heads.
            dropout (float): The dropout rate.
        """
        super(EncoderLayer, self).__init__()

        self.att1 = ResidualAttention(embed_dim=embed_dim,
                                      num_heads=num_heads, dropout=dropout)

        self.ff = ResidualFeedForward(embed_dim=embed_dim)

    def forward(self, x: torch.Tensor, padding_mask: torch.Tensor) -> torch.Tensor:
        """
        Forward pass of the EncoderLayer module.

        Args:
            x (torch.Tensor): The input tensor.
            padding_mask (torch.Tensor): The padding mask tensor.

        Returns:
            torch.Tensor: The output tensor after processing.
        """
        x = self.att1(x, padding_mask)
        return self.ff(x)


class Encoder(nn.Module):
    def __init__(self, output_size: int, num_heads: int, dropout: float, num_layers: int = 3) -> None:
        """
        Initializes the Encoder module.

        Args:
            vocab_size (int): The size of the vocabulary.
            embed_dim (int): The dimensionality of the input embeddings.
            num_heads (int): The number of attention heads.
            dropout (float): The dropout rate.
            num_layers (int): The number of encoder layers. Default is 3.
        """
        super(Encoder, self).__init__()

        self.pe = ExpandValues(input_size=1, output_size=output_size)

        self.layers = nn.ModuleList(
            [EncoderLayer(embed_dim=output_size,
                          num_heads=num_heads, dropout=dropout) for _ in range(num_layers)])

        self.ff = nn.Sequential(
            nn.Linear(output_size, output_size),
            nn.LeakyReLU(),
        )

    def forward(self, x: torch.Tensor, padding_mask: torch.Tensor) -> torch.Tensor:
        """
        Forward pass of the Encoder module.

        Args:
            x (torch.Tensor): The input tensor.
            padding_mask (torch.Tensor): The padding mask tensor.

        Returns:
            torch.Tensor: The output tensor after processing.
        """
        x = self.pe(x)
        for layer in self.layers:
            x = layer(x, padding_mask)
        return self.ff(x)

In [69]:
import torch
import torch.nn as nn


class ModelRS(nn.Module):
    def __init__(self, output_size: int, num_heads: int, dropout: float, num_layers: int) -> None:
        """
        Initializes the ModelXT module.

        Args:
            vocab_size (int): The size of the vocabulary.
            embed_dim (int): The dimensionality of the input embeddings.
            num_heads (int): The number of attention heads.
            dropout (float): The dropout rate.
            num_classes (int): The number of classes for classification.
            num_layers (int): The number of layers in the encoder.
        """
        super(ModelRS, self).__init__()

        self.encoder = Encoder(
            output_size=output_size, num_heads=num_heads, dropout=dropout, num_layers=num_layers)


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward pass of the ModelXT module.

        Args:
            x (torch.Tensor): The input tensor.
            padding_mask (torch.Tensor): The padding mask tensor.

        Returns:
            torch.Tensor: The output tensor after processing.
        """
        x = self.encoder(x, torch.ones(1))
        # calcula o pooling medio
        x = x.mean(dim=1)
        # x = self.classifier(x)
        return x

In [84]:
import torch
rs = ModelRS(output_size=100, num_heads=10, dropout=0.3, num_layers=3)

original_tensor = torch.rand(100, 1)
rs(original_tensor).shape

torch.Size([100, 100])

In [109]:
import pandas as pd

# Carregar os dados do arquivo de imagem fornecido pelo usuário
# Como não temos uma função direta para extrair texto de imagens,
# vamos simular um dataset com base na estrutura fornecida pelo usuário.

# Criar um dataset de exemplo baseado na estrutura da imagem
data = {
    "Date": [
        "28/11/2023", "17/04/2023", "25/11/2023", "01/12/2023", "15/03/2023",
        "12/12/2023", "14/12/2023", "14/08/2023", "06/03/2023", "24/04/2023",
        "13/12/2023", "03/04/2023", "13/03/2023", "05/10/2023", "27/10/2023",
        "09/09/2023", "12/12/2023", "23/03/2023"
    ],
    "Price": [
        -47.00, -33.50, -37.30, -87.70, -39.40, -5.50, -39.58, -66.40, -23.00,
        -66.60, -29.18, -16.90, -25.89, -22.00, -39.00, -43.90, -2.99, -35.80
    ],
    "Category": [
        "RESTAURANTES"] * 18,  # Assuming all entries are from the same category
    "Day_of_week": [2, 1, 6, 5, 3, 2, 4, 1, 1, 1, 3, 1, 1, 4, 5, 6, 2, 4]
}

# Converter para um DataFrame do pandas
df = pd.DataFrame(data)

# Converter a coluna 'Date' para o formato datetime e definir como índice
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')
df.set_index('Date', inplace=True)

# Ordenar o dataframe por data
df.sort_index(inplace=True)

# Para este exemplo, vamos assumir que não há valores ausentes e que o dataset está limpo.

# Adicionar colunas para mês e ano
df['Month'] = df.index.month
df['Year'] = df.index.year

# Transformar 'Day_of_week' em variáveis categóricas
df['Day_of_week'] = df['Day_of_week'].astype('category')

# Dividir os dados em conjunto de treino e teste (vamos usar os últimos 4 registros como teste)
train = df.iloc[:-4]
test = df.iloc[-4:]

# Exibir as primeiras linhas do conjunto de treino
train.head()

Unnamed: 0_level_0,Price,Category,Day_of_week,Month,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-03-06,-23.0,RESTAURANTES,1,3,2023
2023-03-13,-25.89,RESTAURANTES,1,3,2023
2023-03-15,-39.4,RESTAURANTES,3,3,2023
2023-03-23,-35.8,RESTAURANTES,4,3,2023
2023-04-03,-16.9,RESTAURANTES,1,4,2023
