In [None]:
import sqlite3
import pandas as pd

In [None]:
# Leitura dos dados
df = pd.read_csv('../data/invoices.csv')
df['month'] = pd.to_datetime(df['month'])

In [None]:
df.head()

In [None]:
# Ingestão dos dados
conn = sqlite3.connect('meta.db')
df.to_sql('invoices', conn, if_exists='replace', index=False)
conn.close()

In [None]:
#Teste do banco de dados
conn = sqlite3.connect('meta.db')
cursor = conn.cursor()

cursor.execute("SELECT * FROM invoices")

data = cursor.fetchall()

select = pd.DataFrame(data, columns=['customer', 'account', 'month', 'invoice'])

conn.close()

In [None]:
select.head()

In [None]:
# Conectar ao banco de dados
conn = sqlite3.connect('meta.db')
cursor = conn.cursor()

# Criar a query para calcular a média de faturamento nos últimos 3 e 6 meses retroativos a Jan/2020
query = """

-- Filtragem dos dados para considerar apenas retroativos a Jan/2020
WITH tabela_filtrada AS (
    SELECT 
        account, 
        month, 
        invoice
    FROM 
        invoices
    WHERE 
        month < '2020-01-01'
),

-- Seleção das faturas dos últimos 3 meses
invoices_last_3_months AS (
    SELECT 
        account, 
        invoice
    FROM 
        tabela_filtrada
    WHERE 
        month >= DATE('2020-01-01', '-3 months')
),

-- Seleção das faturas dos últimos 6 meses
invoices_last_6_months AS (
    SELECT 
        account, 
        invoice
    FROM 
        tabela_filtrada
    WHERE 
        month >= DATE('2020-01-01', '-6 months')
),

-- Cálculo da média de faturas dos últimos 3 meses
avg_invoices_last_3_months AS (
SELECT
    account,
    CASE
        WHEN COUNT(invoice) < 3 THEN NULL
        ELSE AVG(invoice)
    END AS avg_invoices_last_3_months
FROM invoices_last_3_months
GROUP BY account
),

-- Cálculo da média de faturas dos últimos 6 meses
avg_invoices_last_6_months AS (
SELECT
    account,
    CASE
        WHEN COUNT(invoice) < 6 THEN NULL
        ELSE AVG(invoice)
    END AS avg_invoices_last_6_months
FROM invoices_last_6_months
GROUP BY account
)

-- Querie final e formatação dos resultados
SELECT 
    a.account, 
    ROUND(ail3m.avg_invoices_last_3_months, 2) AS avg_invoices_last_3_months, 
    ROUND(ail6m.avg_invoices_last_6_months, 2) AS avg_invoices_last_6_months
FROM 
    (SELECT DISTINCT account FROM tabela_filtrada) a
LEFT JOIN 
    avg_invoices_last_3_months ail3m ON a.account = ail3m.account
LEFT JOIN 
    avg_invoices_last_6_months ail6m ON a.account = ail6m.account;
"""

# Executar a consulta
cursor.execute(query)

# Obter os resultados
result = cursor.fetchall()

# Converter os resultados em um DataFrame
df_avg_invoices = pd.DataFrame(result, columns=['account', 'avg_invoices_last_3_months', 'avg_invoices_last_6_months'])

# Fechar a conexão
conn.close()

# Exibir o DataFrame
print(df_avg_invoices)