# Set Env

In [2]:
from google.colab import drive
drive.mount('/content/drive')

# Clonar o repositório para o Google Drive
# !git clone https://github.com/gbessern/framework_monitoramento /content/drive/MyDrive/framework_monitoramento

Mounted at /content/drive


In [3]:
!pip install --upgrade pandas

Collecting pandas
  Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m74.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalled pandas-2.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf-cu12 24.10.1 requires pandas<2.2.3dev0,>=2.0, but you have pandas 2.2.3 which is incompatible.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.2.3 w

In [4]:
!pip install pyspark findspark

import findspark
findspark.init()

from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("Framework monitoramento").getOrCreate()

Collecting findspark
  Downloading findspark-2.0.1-py2.py3-none-any.whl.metadata (352 bytes)
Downloading findspark-2.0.1-py2.py3-none-any.whl (4.4 kB)
Installing collected packages: findspark
Successfully installed findspark-2.0.1


In [5]:
# Initialize findspark
import findspark
findspark.init()

# Create a PySpark session
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()
spark

# Imports

In [6]:
%load_ext autoreload
%autoreload 2

import sys
import os

sys.path.append('/content/drive/MyDrive/framework_monitoramento/Framework')
sys.path.append('/content/drive/MyDrive/framework_monitoramento/exemplo_uso')

from monit_performance import monit_performance
import pyspark.sql.functions as F


# Simula dados para monitorar

In [7]:
from dados.simula_dados import *
from dados.cria_baseline_mensais import *

months = [f"2023{str(month).zfill(2)}" for month in range(1, 13)]

# Gerar dados para todos os meses
data = pd.concat([generate_monthly_data(month) for month in months], ignore_index=True)

# Aplicar classificação de status dos scores
data = data.apply(classify_score_status, axis=1)

df_spark = spark.createDataFrame(data)
df_spark.show()

+-------+--------+--------+-----+-------+--------------+-----------+-----------+-----------+------+------------------+------------------+------------------+
|ano_mes|group_ab|group_xy|idade|  renda|relacionamento|score_ext_1|score_ext_2|score_final|flag_1|score_ext_1_status|score_ext_2_status|score_final_status|
+-------+--------+--------+-----+-------+--------------+-----------+-----------+-----------+------+------------------+------------------+------------------+
| 202301|       B|       X|   66| 5k~10k|             1|        542|        109|        424|     1|      score válido|      score válido|      score válido|
| 202301|       A|       Y|   29|  1k~3k|             0|        242|        933|          0|     1|      score válido|      score válido|      score válido|
| 202301|       B|       Y|   86|   30k+|             0|        336|        865|          0|     1|      score válido|      score válido|      score válido|
| 202301|       A|       Y|   75|  3k~5k|             1|  

In [8]:
# Diretório de saída para os CSVs
output_dir = "/content/drive/MyDrive/framework_monitoramento/exemplo_uso/dados/"

# Filtrar dados da baseline
baseline_df = df_spark.filter(F.col("ano_mes").isin(202301, 202302, 202303))

# Salvar os dados da baseline em um arquivo CSV
baseline_csv_path = os.path.join(output_dir, "tbl_baseline.csv")
baseline_df.write.mode("overwrite").csv(baseline_csv_path, header=True)
print(f"Baseline salva em: {baseline_csv_path}")

# Loop para criar e salvar os dados mensais em arquivos CSV
for month in range(202304, 202307):
    month_df = df_spark.filter(F.col("ano_mes") == month)
    month_csv_path = os.path.join(output_dir, f"tbl_{month}.csv")
    month_df.write.mode("overwrite").csv(month_csv_path, header=True)
    print(f"Tabela para o mês {month} salva em: {month_csv_path}")

Baseline salva em: /content/drive/MyDrive/framework_monitoramento/exemplo_uso/dados/tbl_baseline.csv
Tabela para o mês 202304 salva em: /content/drive/MyDrive/framework_monitoramento/exemplo_uso/dados/tbl_202304.csv
Tabela para o mês 202305 salva em: /content/drive/MyDrive/framework_monitoramento/exemplo_uso/dados/tbl_202305.csv
Tabela para o mês 202306 salva em: /content/drive/MyDrive/framework_monitoramento/exemplo_uso/dados/tbl_202306.csv


# Executa Monitoramento

In [19]:
params_exec = '/content/drive/MyDrive/framework_monitoramento/exemplo_uso/params_exec.json'
params_monit = '/content/drive/MyDrive/framework_monitoramento/exemplo_uso/params_monit.json'
monit_performance(params_exec, params_monit)

*************** Carregando Parâmetros ***************
*************** Calculando Percentis ***************
Percentis calculados e salvos para a safra baseline em /content/drive/MyDrive/framework_monitoramento/exemplo_uso/results/percentis_baseline.csv.
Percentis calculados e salvos para a safra 202304 em /content/drive/MyDrive/framework_monitoramento/exemplo_uso/results/percentis_prod_202304.csv.
Percentis calculados e salvos para a safra 202305 em /content/drive/MyDrive/framework_monitoramento/exemplo_uso/results/percentis_prod_202305.csv.
Percentis calculados e salvos para a safra 202306 em /content/drive/MyDrive/framework_monitoramento/exemplo_uso/results/percentis_prod_202306.csv.
*************** Calculando PSI ***************
Processando safra: 202304
PSI calculado para safra 202304 e salvo em: /content/drive/MyDrive/framework_monitoramento/exemplo_uso/results/psi_202304.csv
Processando safra: 202305
PSI calculado para safra 202305 e salvo em: /content/drive/MyDrive/framework_moni