In [1]:
import pandas as pd

import import_ipynb
from preprocessor import preprocessor, drop_index, read_file
from tree import Tree, baseline

result_path = '../result_kor_OSX.csv'
result_out_path = 'result_out.csv'

def train(condition, model):
    target_columns = condition.target_columns
    for target_column in target_columns:
        dropping_columns = target_columns.drop(target_column)
        model.train(target_column, dropping_columns)

def predict(condition, model):
    target_columns = condition.target_columns
    predicted = {}
    total_score = 0
    for target_column in target_columns:
        dropping_columns = target_columns.drop(target_column)
        preprocessed = condition.preprocessed.values.reshape(1, -1)
        predicted[target_column], score = model.predict(
            preprocessed, target_column, dropping_columns)
        base_score = baseline(target_column, dropping_columns)
        total_score += score
        total_base_score += base_score
    return pd.Series(predicted), total_score, total_base_score

def char_to_int(char):
    return ord(char.lower()) - 96

model = Tree()
for index, condition in enumerate(preprocessor.conditions):
    train(condition, model)
    
completed_rows = []
total_score = 0
total_base_score = 0
conditions = preprocessor.conditions
for condition in conditions:
    original = drop_index(condition.original.dropna(), condition.target_columns)
    predicted, score, base_score = predict(condition, model)
    total_score += score
    total_base_score += base_score
    completed_row = pd.concat([original, predicted])
    completed_rows.append(completed_row)
    
number_of_models = sum(len(condition.target_columns) for condition in conditions)
avg_score = total_score / number_of_models
avg_base_score = total_base_score / number_of_models
print()
print("Total score: ", avg_score, " (total base score: {})".format(avg_base_score))

completed = (pd.concat(completed_rows, axis=1, sort=False).T
                .reindex(columns=preprocessor.predict_columns_order))
completed['사상자수'] = (completed
                        .reindex(
                            columns=['사망자수', '중상자수', '경상자수'])
                        .apply(sum, axis=1))

result = read_file(result_path)
rows = []
for index, row in result.iterrows():
    row['값'] = completed.loc[row['열'] - 2][char_to_int(row['행']) - 1]
    rows.append(row)
result_out = pd.DataFrame(rows)
result_out.to_csv(result_out_path)

Test score: 0.9260001635100559, (base_score: 0.9668091268418382)


TypeError: train() missing 2 required positional arguments: 'target_column' and 'dropping_columns'