In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Carregar os dados
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
sample_submission_df = pd.read_csv('sample_submission.csv')

# Tratar valores faltantes
imputer = SimpleImputer(strategy='mean')
train_df_imputed = pd.DataFrame(imputer.fit_transform(train_df.drop(columns=['Target'])))
test_df_imputed = pd.DataFrame(imputer.transform(test_df))

# Restaurar nomes das colunas
train_df_imputed.columns = train_df.drop(columns=['Target']).columns
test_df_imputed.columns = test_df.columns

# Codificar a coluna alvo
label_encoder = LabelEncoder()
train_df['Target'] = label_encoder.fit_transform(train_df['Target'])

# Separar características e alvo
X = train_df_imputed
y = train_df['Target']

# Dividir os dados em conjuntos de treino e validação
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizar os dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
test_df_scaled = scaler.transform(test_df_imputed)

# Treinar o modelo Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Fazer previsões no conjunto de validação
y_val_pred = rf_model.predict(X_val_scaled)

# Avaliar o desempenho do modelo
accuracy = accuracy_score(y_val, y_val_pred)
classification_report_str = classification_report(y_val, y_val_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report_str)

# Fazer previsões no conjunto de teste
test_predictions = rf_model.predict(test_df_scaled)

# Converter previsões de volta para as classes originais
test_predictions_labels = label_encoder.inverse_transform(test_predictions)

# Preparar o arquivo de submissão
submission_df = sample_submission_df.copy()
submission_df['Target'] = test_predictions_labels

# Salvar o arquivo de submissão
submission_df.to_csv('submission.csv', index=False)

print('Arquivo de submissão salvo como submission.csv')


Accuracy: 0.8277574490329326
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.83      0.86      5028
           1       0.65      0.60      0.62      3017
           2       0.85      0.92      0.88      7259

    accuracy                           0.83     15304
   macro avg       0.80      0.78      0.79     15304
weighted avg       0.83      0.83      0.83     15304

Arquivo de submissão salvo como submission.csv
