In [None]:
import numpy as np, pandas as pd
from pathlib import Path
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.preprocessing import LabelEncoder
from lightgbm import LGBMRegressor, LGBMClassifier

ROOT = Path("..").resolve(); INPUT = ROOT/"input"; WORK = ROOT/"working"
WORK.mkdir(exist_ok=True, parents=True)

TARGET = "target"  # ここをコンペに合わせて変更
train = pd.read_csv(INPUT/"train.csv")
assert TARGET in train.columns

In [None]:
X = train.drop(columns=[TARGET])
y = train[TARGET]
# 文字列列は LabelEncode（最小）
for c in X.columns:
    if X[c].dtype == 'object':
        le = LabelEncoder()
        X[c] = le.fit_transform(X[c].astype(str))

# ざっくり判定：数値でユニークが多ければ回帰、少なければ分類
if y.dtype.kind in "ifu" and y.nunique() > 20:
    task = 'regression'
else:
    task = 'classification'
print('task =', task)

In [None]:
if task == 'classification':
    if y.dtype == 'object':
        y = LabelEncoder().fit_transform(y.astype(str))
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    oof = np.zeros(len(y))
    models = []
    for i, (tr, va) in enumerate(skf.split(X, y)):
        model = LGBMClassifier(
            n_estimators=2000, 
            learning_rate=0.05,
            subsample=0.8, 
            colsample_bytree=0.8,
            objective='binary' if len(np.unique(y))==2 else 'multiclass'
        )
        model.fit(X.iloc[tr], y[tr], eval_set=[(X.iloc[va], y[va])], verbose=False)
        oof[va] = model.predict(X.iloc[va])
        models.append(model)
else:
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    oof = np.zeros(len(y))
    models = []
    for i, (tr, va) in enumerate(kf.split(X)):
        model = LGBMRegressor(
            n_estimators=2000, 
            learning_rate=0.05,
            subsample=0.8, 
            colsample_bytree=0.8, 
            objective='l1'
        )
        model.fit(X.iloc[tr], y.iloc[tr], eval_set=[(X.iloc[va], y.iloc[va])], verbose=False)
        oof[va] = model.predict(X.iloc[va])
        models.append(model)

# ざっくりスコア
if task == 'classification':
    from sklearn.metrics import accuracy_score
    acc = accuracy_score(y, oof)
    print(f"CV acc (rough) = {acc:.5f}")
else:
    mae = float(np.mean(np.abs(oof - y.values)))
    print(f"CV MAE (rough) = {mae:.6f}")

# モデル保存（最小: pickle）
import pickle, pathlib
for i, m in enumerate(models):
    with open(WORK/f"model_fold{i}.pkl", 'wb') as f:
        pickle.dump(m, f)
print("saved models ->", list(pathlib.Path(WORK).glob('model_fold*.pkl')))