In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# 데이터 로드
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# 특성 선택
features = ['latitude', 'longitude', 'area_m2', 'floor', 'built_year', 'contract_year_month']

# Train 데이터를 train과 validation으로 분리
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

# 스케일링
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train_df[features])
val_scaled = scaler.transform(val_df[features])
test_scaled = scaler.transform(test_df[features])

# BallTree 구축
tree = BallTree(train_scaled, leaf_size=2)

# 예측 함수
def predict_deposit(neighbors, distances):
    weights = 1 / (distances + 1e-5)
    weighted_deposits = neighbors['deposit'] * weights
    return np.sum(weighted_deposits) / np.sum(weights)

# 예측 수행 함수
def make_predictions(data_scaled, k):
    distances, indices = tree.query(data_scaled, k=k)
    predictions = []
    for i in range(len(data_scaled)):
        neighbor_indices = indices[i]
        neighbor_distances = distances[i]
        neighbors = train_df.iloc[neighbor_indices]
        pred = predict_deposit(neighbors, neighbor_distances)
        predictions.append(pred)
    return predictions

# Validation 예측
k = 5
val_predictions = make_predictions(val_scaled, k)

# Validation MAE 계산
val_mae = mean_absolute_error(val_df['deposit'], val_predictions)
print(f"Validation MAE: {val_mae}")

# Test 예측
test_predictions = make_predictions(test_scaled, k)

# 결과 저장
submission_df = pd.DataFrame({
    'index': test_df['index'],
    'deposit': test_predictions
})

submission_df.to_csv('submission.csv', index=False)

print("예측이 완료되었습니다. 'submission.csv' 파일을 확인하세요.")