# üìå AUTOML COM PYCARET - CLASSIFICA√á√ÉO EM FUNDOS IMOBILI√ÅRIOS

# üöÄ Passo 1: Instala√ß√£o Condicional de Bibliotecas e importa√ß√£o das bibliotecas 

In [None]:
## Criando e ativando um ambiente virtual
## O pycaret n√£o funciona nas novas vers√µes do python
# conda create --prefix ./env python=3.10 -y
# conda activate ./env
# conda install -p "c:..." ipykernel --update-deps --force-reinstall


In [2]:
import sys
import subprocess
# Lista de bibliotecas necess√°rias
libraries = [
    "pycaret", "pandas", "numpy", "matplotlib", 
    "seaborn", "openpyxl", "scikit-learn"
]

# Fun√ß√£o para instalar bibliotecas ausentes
def install_if_missing(lib):
    if lib not in sys.modules:
        subprocess.check_call([sys.executable, "-m", "pip", "install", lib])

# Verificar e instalar bibliotecas
for lib in libraries:
    install_if_missing(lib)

# üìö Passo 2: Importa√ß√£o das Bibliotecas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pycaret.classification import *

# Configura√ß√£o global de plots
sns.set_style("whitegrid")

# üóÇÔ∏è Passo 3: Carregar os Dados

In [3]:
# Carregar os dados do Excel
df = pd.read_excel("seu_arquivo.xlsx")

# Exibir as primeiras linhas
df.head()

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "c:\Users\Joao\Desktop\project FI\env\lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
    await eval(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Joao\AppData\Local\Temp\ipykernel_22872\913730433.py", line 2, in <module>
    df = pd.read_excel("seu_arquivo.xlsx")
  File "c:\Users\Joao\Desktop\project FI\env\lib\site-packages\pandas\io\excel\_base.py", line 504, in read_excel
    io = ExcelFile(
  File "c:\Users\Joao\Desktop\project FI\env\lib\site-packages\pandas\io\excel\_base.py", line 1563, in __init__
    ext = inspect_excel_format(
  File "c:\Users\Joao\Desktop\project FI\env\lib\site-packages\pandas\io\excel\_base.py", line 1419, in inspect_excel_format
    with get_handle(
  File "c:\Users\Joao\Desktop\project FI\env\lib\site-packages\pandas\io\common.py", line 872, in get_handle
    handle = open(handle, ioargs.mode)
FileNotFoundError: [Errno 2] No such file or directory: 'seu_arquivo.xlsx'



# üîç Passo 4: Verifica√ß√£o de Consist√™ncia dos Dados

In [None]:
# 4.1 Verificar informa√ß√µes gerais
df.info()

# 4.2 Estat√≠sticas descritivas
df.describe()

# 4.3 Contagem de valores nulos
print("\nValores nulos por coluna:\n", df.isnull().sum())

# 4.4 Detectar vari√°veis constantes e quase constantes
def check_constant_features(df, threshold=0.99):
    """ Identifica vari√°veis com baixa varia√ß√£o """
    constant_features = []
    for col in df.select_dtypes(include=['number']).columns:
        top_freq = df[col].value_counts(normalize=True, dropna=False).values[0]
        if top_freq >= threshold:
            constant_features.append(col)
    return constant_features

const_features = check_constant_features(df)
print(f"\nüî¥ Vari√°veis Quase Constantes (threshold=99%): {const_features}")

# 4.5 Verificar classes raras na vari√°vel alvo
if "target" in df.columns:
    class_counts = df["target"].value_counts(normalize=True)
    rare_classes = class_counts[class_counts < 0.05].index.tolist()
    print(f"\nüî¥ Classes Raras (<5% de representatividade): {rare_classes}")

# 4.6 Matriz de Correla√ß√£o
plt.figure(figsize=(12,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Matriz de Correla√ß√£o")
plt.show()


# üõ†Ô∏è Passo 5: Pr√©-Processamento e Feature Engineering

In [None]:
# Converter vari√°veis categ√≥ricas em dummies (exemplo: normativas estaduais)
df = pd.get_dummies(df, columns=['normativa_estado'], drop_first=True)

# Lidar com valores ausentes (preenchimento com mediana)
df.fillna(df.median(), inplace=True)

# Remover vari√°veis constantes
df.drop(columns=const_features, inplace=True)

# Separar features e vari√°vel alvo
X = df.drop(columns=['target'])
y = df['target']

# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizar os dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# üöÄ Passo 6: Configura√ß√£o do AutoML com PyCaret

In [None]:
# Criar dataframe com treino para o PyCaret
df_train = pd.DataFrame(X_train_scaled, columns=X.columns)
df_train['target'] = y_train.values

# Inicializar a configura√ß√£o do PyCaret
clf = setup(data=df_train, target='target', session_id=42, 
            normalize=True, 
            categorical_features=['normativa_estado'],
            feature_selection=True, 
            remove_multicollinearity=True,
            transformation=True,
            fix_imbalance=True) 


# üèÜ Passo 7: Treinamento e Compara√ß√£o de Modelos

In [None]:
# Treinar e comparar diferentes modelos automaticamente
best_model = compare_models()

# üîç Passo 8: Ajuste do Melhor Modelo

In [None]:
# Criar o melhor modelo encontrado
final_model = create_model(best_model)

# Ajustar hiperpar√¢metros
tuned_model = tune_model(final_model)

# üìà Passo 9: Avalia√ß√£o do Modelo

In [None]:
# Avaliar o modelo
evaluate_model(tuned_model)

# Exibir a import√¢ncia das features
plot_model(tuned_model, plot='feature')

# üéØ Passo 10: Predi√ß√µes em Novos Dados

In [None]:
# Fazer previs√µes nos dados de teste
df_test = pd.DataFrame(X_test_scaled, columns=X.columns)
df_test['target'] = y_test.values

predictions = predict_model(tuned_model, data=df_test)

# Exibir previs√µes
predictions.head()