In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pickle

In [2]:
def preprocess_data(train_data, test_data):
    transformer = ColumnTransformer(
        transformers=[
            ('onehot', OneHotEncoder(dtype='int'), train_data.columns[:-1])  # Exclude 'type'
        ]
    )
    
    return transformer.fit_transform(train_data), transformer.transform(test_data)


In [3]:
def modelin(train_file, test_file):
    # Cargar los datos
    train_data = pd.read_csv(train_file)
    test_data = pd.read_csv(test_file)

    # Preprocesar los datos
    X_train, X_test = preprocess_data(train_data, test_data)
    y_train = train_data['type']
    y_test = test_data['type']

    # Entrenar el modelo
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    # Evaluar el modelo
    y_pred = model.predict(X_test)
    
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy:.2f}')

    # Guardar el modelo
    with open('model.pkl', 'wb') as file:
        pickle.dump(model, file)

In [4]:
modelin('train_dataset.csv', 'test_dataset.csv')

Classification Report:
              precision    recall  f1-score   support

           e       1.00      1.00      1.00      1041
           p       1.00      1.00      1.00       990

    accuracy                           1.00      2031
   macro avg       1.00      1.00      1.00      2031
weighted avg       1.00      1.00      1.00      2031

Accuracy: 1.00
