In [1]:
import sys
sys.path.append('../src')
sys.path.append('../submissions')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from load import load_train_data, load_test_data
from evaluate import evaluate_model
from submit import save_submission

pd.set_option('display.max_columns', None)

In [2]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score

In [4]:
# Load data and prepare data
train_data = load_train_data(local=True)
test_data = load_test_data(local=True)

TARGET = 'damage_grade'

X = train_data.copy()
y = train_data.pop(TARGET)

In [5]:
# Split train dataset into train and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2,
                                                      stratify=y,
                                                      random_state=42)

In [6]:
# Create a simple pipeline
cols_categorical = train_data.select_dtypes(include='object').columns

preprocess = ColumnTransformer([
    ('onehot', OneHotEncoder(), cols_categorical)
])

log_reg = LogisticRegression(multi_class='multinomial', random_state=42)

simple_pipe = Pipeline(steps=[
    ('preprocess', preprocess),
    ('model', log_reg)
])

In [10]:
# Evaluate model performance
score = evaluate_model(simple_pipe, X_train, X_valid, y_train, y_valid)
print(f"F1-score: {score :.3f}")

F1-score: 0.577


In [9]:
# Save predictions into a file for submission
save_submission(simple_pipe, test_data, 'submission_0.csv')