In [None]:
import pandas as pd
from sklearn.preprocessing import LabelBinarizer,OneHotEncoder

# Passo 1: Carregar o dataset
def load_powerlifting_dataset():
    url = "data/openpowerlifting.csv"
    df = pd.read_csv(url)
    print("Dataset carregado com sucesso.")
    return df

# Passo 2: Selecionar colunas relevantes (exemplo)
def preprocess_data(df):
    df = df[['Sex', 'Equipment', 'Event', 'Division', 'Age', 'BodyweightKg', 'Best3SquatKg', 'Best3BenchKg', 'Best3DeadliftKg', 'TotalKg']]
    df = df.dropna()
    print(f"Tamanho após remoção de NAs: {df.shape}")
    return df

# Passo 3: Aplicar One-Hot Encoding
def one_hot_encode(df):
    categorical_cols = ['Sex', 'Equipment', 'Event', 'Division']
    encoder = OneHotEncoder(sparse=False, drop='first') 
    encoded_array = encoder.fit_transform(df[categorical_cols])
    encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(categorical_cols))
    
    df_encoded = pd.concat([df.drop(columns=categorical_cols).reset_index(drop=True), encoded_df], axis=1)
    return df_encoded

# Executar
if __name__ == "__main__":
    df_raw = load_powerlifting_dataset()
    df_clean = preprocess_data(df_raw)
    df_encoded = one_hot_encode(df_clean)

    print("\nExemplo de dados codificados com One-Hot Encoding:")
    print(df_encoded.head())
