In [47]:
import sqlite3
import pandas as pd

In [48]:
# Leitura dos dados
df = pd.read_csv('../data/invoices.csv')
df['month'] = pd.to_datetime(df['month'])

In [49]:
df.head()

Unnamed: 0,customer,account,month,invoice
0,C1000,A1100,2019-07-01,500
1,C1000,A1100,2019-08-01,1000
2,C1000,A1100,2019-09-01,78000
3,C1000,A1100,2019-10-01,34000
4,C1000,A1100,2019-11-01,55000


In [50]:
# Ingestão dos dados
conn = sqlite3.connect('meta_teste.db')
df.to_sql('invoices', conn, if_exists='replace', index=False)
conn.close()

In [51]:
#Teste do banco de dados
conn = sqlite3.connect('meta_teste.db')
cursor = conn.cursor()

cursor.execute("SELECT * FROM invoices")

data = cursor.fetchall()

select = pd.DataFrame(data, columns=['customer', 'account', 'month', 'invoice'])

conn.close()

In [52]:
select.head()

Unnamed: 0,customer,account,month,invoice
0,C1000,A1100,2019-07-01 00:00:00,500
1,C1000,A1100,2019-08-01 00:00:00,1000
2,C1000,A1100,2019-09-01 00:00:00,78000
3,C1000,A1100,2019-10-01 00:00:00,34000
4,C1000,A1100,2019-11-01 00:00:00,55000


In [55]:
# Conectar ao banco de dados
conn = sqlite3.connect('meta_teste.db')
cursor = conn.cursor()

# Criar a query para calcular a média de faturamento nos últimos 3 e 6 meses retroativos a Jan/2020
query = """

-- Filtragem dos dados para considerar apenas retroativos a Jan/2020
WITH tabela_filtrada AS (
    SELECT 
        account, 
        month, 
        invoice
    FROM 
        invoices
    WHERE 
        month < '2020-01-01'
),

-- Seleção das faturas dos últimos 3 meses
invoices_last_3_months AS (
    SELECT 
        account, 
        invoice
    FROM 
        tabela_filtrada
    WHERE 
        month >= DATE('2020-01-01', '-3 months')
),

-- Seleção das faturas dos últimos 6 meses
invoices_last_6_months AS (
    SELECT 
        account, 
        invoice
    FROM 
        tabela_filtrada
    WHERE 
        month >= DATE('2020-01-01', '-6 months')
),

-- Cálculo da média de faturas dos últimos 3 meses
avg_invoices_last_3_months AS (
SELECT
    account,
    CASE
        WHEN COUNT(invoice) < 3 THEN NULL
        ELSE AVG(invoice)
    END AS avg_invoices_last_3_months
FROM invoices_last_3_months
GROUP BY account
),

-- Cálculo da média de faturas dos últimos 6 meses
avg_invoices_last_6_months AS (
SELECT
    account,
    CASE
        WHEN COUNT(invoice) < 6 THEN NULL
        ELSE AVG(invoice)
    END AS avg_invoices_last_6_months
FROM invoices_last_6_months
GROUP BY account
)

-- Querie final e formatação dos resultados
SELECT 
    a.account, 
    ROUND(ail3m.avg_invoices_last_3_months, 2) AS avg_invoices_last_3_months, 
    ROUND(ail6m.avg_invoices_last_6_months, 2) AS avg_invoices_last_6_months
FROM 
    (SELECT DISTINCT account FROM tabela_filtrada) a
LEFT JOIN 
    avg_invoices_last_3_months ail3m ON a.account = ail3m.account
LEFT JOIN 
    avg_invoices_last_6_months ail6m ON a.account = ail6m.account;
"""

# Executar a consulta
cursor.execute(query)

# Obter os resultados
result = cursor.fetchall()

# Converter os resultados em um DataFrame
df_avg_invoices = pd.DataFrame(result, columns=['account', 'avg_invoices_last_3_months', 'avg_invoices_last_6_months'])

# Fechar a conexão
conn.close()

# Exibir o DataFrame
print(df_avg_invoices)

  account  avg_invoices_last_3_months  avg_invoices_last_6_months
0   A1100                    56333.33                    41416.67
1   A1200                    50666.67                         NaN
2   A1300                         NaN                         NaN
3   A2100                    55400.00                         NaN
