<a href="https://colab.research.google.com/github/akito584/push_practice/blob/main/NFL_Drafted_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#ライブラリインポート(基本)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score


In [None]:
PATH = '/content/drive/MyDrive/GCI/data2_competition2/'
train_df = pd.read_csv(PATH +'train.csv')
test_df = pd.read_csv(PATH +'test.csv')

In [None]:
# 1. 各ポジションごとの計算ロジックを定義
# 手動設定ルール
logic_map = {
    'OT': lambda x: x['Sprint_40yd'],
    'P':  lambda x: x['Sprint_40yd'],
    'K':  lambda x: x['Sprint_40yd'],
    'QB': lambda x: x['Sprint_40yd'],  # ベンチプレス欠損多数のためSprint_40ydに変更
    'OG': lambda x: x['Agility_3cone'] * x['Shuttle'],
    'DE': lambda x: x['Broad_Jump'],
    'CB': lambda x: x['Weight'],
    'DT': lambda x: x['Weight'],
    'WR': lambda x: (x['Shuttle'] / x['Weight']) * x['Height']
}

# 自動分析で決定されたルール（その他のポジション用）
auto_logic_cols = {
    'OLB': 'Bench_Press_Reps',
    'RB':  'Broad_Jump',
    'TE':  'Bench_Press_Reps',
    'S':   'Bench_Press_Reps',
    'FB':  'Bench_Press_Reps',
    'DB':  'Bench_Press_Reps',
    'SS':  'Vertical_Jump',
    'ILB': 'Bench_Press_Reps',
    'FS':  'Broad_Jump',
    'C':   'Bench_Press_Reps',
    'LS':  'Bench_Press_Reps'
}

# 自動分析ルールをlambda式に変換して統合
for pos, col in auto_logic_cols.items():
    if pos not in logic_map:
        # colの値をキャプチャしてlambdaを作成
        logic_map[pos] = lambda x, c=col: x[c]

# 2. 特徴量作成関数
def calculate_feature(row):
    pos = row['Position']
    # 定義されたロジックがあれば適用、なければNaN
    if pos in logic_map:
        try:
            return logic_map[pos](row)
        except:
            return np.nan
    return np.nan

# 3. 生の特徴量を計算
train_df['selected_feature_raw'] = train_df.apply(calculate_feature, axis=1)

# 4. ポジションごとに標準化 (Z-score normalization)
# 各ポジション内で 平均0, 分散1 に変換
train_df['selected_feature_std'] = train_df.groupby('Position')['selected_feature_raw'].transform(
    lambda x: (x - x.mean()) / x.std()
)

In [None]:
def add_target_encoding(train_df, target_col, cat_col, n_splits=5, smooth_weight=10):
    """
    K-Fold Target Encoding with Smoothing.

    Args:
        train_df (pd.DataFrame): Training data containing the categorical column and target.
        target_col (str): Name of the target column (e.g., 'Drafted').
        cat_col (str): Name of the categorical column to encode (e.g., 'School').
        n_splits (int): Number of K-Fold splits.
        smooth_weight (float): Smoothing parameter 'm'.

    Returns:
        pd.Series: The encoded feature values (same length as train_df).
        dict: The global mapping dictionary (for test data inference).
    """
    # Initialize the new feature column with NaNs
    encoded_col = np.full(len(train_df), np.nan)

    # 1. Calculate Global Mean (for smoothing and filling NaNs)
    global_mean = train_df[target_col].mean()

    # 2. Setup K-Fold
    # Using StratifiedKFold to ensure label distribution is consistent across folds
    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    # 3. Iterate through folds
    # We use 'split' based on target to stratify
    for tr_idx, val_idx in kf.split(train_df, train_df[target_col]):
        # Split data
        X_tr, X_val = train_df.iloc[tr_idx], train_df.iloc[val_idx]

        # Calculate stats on TRAIN part of the fold
        stats = X_tr.groupby(cat_col)[target_col].agg(['count', 'mean'])

        # Apply Smoothing Formula
        # (n * mean_cat + m * mean_global) / (n + m)
        counts = stats['count']
        means = stats['mean']
        smooth_means = (counts * means + smooth_weight * global_mean) / (counts + smooth_weight)

        # Map values to VALIDATION part of the fold
        # If a category in Val is not in Tr, it becomes NaN here
        encoded_col[val_idx] = X_val[cat_col].map(smooth_means)

    # 4. Fill NaNs (Categories seen in Val but not in Train split) with Global Mean
    encoded_col = pd.Series(encoded_col).fillna(global_mean)

    # 5. Create Global Mapping for Test/Inference (using FULL data)
    # This is what you would save to use on the actual test set later
    all_stats = train_df.groupby(cat_col)[target_col].agg(['count', 'mean'])
    counts_all = all_stats['count']
    means_all = all_stats['mean']
    global_mapping = (counts_all * means_all + smooth_weight * global_mean) / (counts_all + smooth_weight)

    return encoded_col, global_mapping

In [None]:
# Speed Score (ビル・バーンウェルの式を簡略化: Weight * 200 / Sprint^4)
# ※Sprintが遅い（数値が大きい）と分母が大きくなりスコアが下がる
train_df['Speed_Score'] = (train_df['Weight'] * 200) / (train_df['Sprint_40yd']**4)
test_df['Speed_Score'] = (test_df['Weight'] * 200) / (test_df['Sprint_40yd']**4)

# Explosion Score (跳躍系種目の合計)
train_df['Explosion_Score'] = train_df['Vertical_Jump'] + train_df['Broad_Jump']
test_df['Explosion_Score'] = test_df['Vertical_Jump'] + test_df['Broad_Jump']

# Agility Score (アジリティ系種目の合計)
train_df['Agility_Score'] = train_df['Agility_3cone'] + train_df['Shuttle']
test_df['Agility_Score'] = test_df['Agility_3cone'] + test_df['Shuttle']

In [None]:
# 学校名の名寄せ（一部の例）
school_map = {
    'Arizona St.': 'Arizona State',
    'West. Michigan': 'Western Michigan',
    'Mississippi': 'Ole Miss',
    'Miami (FL)': 'Miami',
}
train_df['School'] = train_df['School'].replace(school_map)
test_df['School'] = test_df['School'].replace(school_map)

# PositionのTarget Encoding（Schoolと同じ要領でadd_target_encoding関数を使用）
train_pos_te, pos_te_mapping = add_target_encoding(train_df, 'Drafted', 'Position', n_splits=5)
train_df['Position_Draft_Rate'] = train_pos_te
test_df['Position_Draft_Rate'] = test_df['Position'].map(pos_te_mapping).fillna(pos_te_mapping.mean())

  stats = X_tr.groupby(cat_col)[target_col].agg(['count', 'mean'])
  stats = X_tr.groupby(cat_col)[target_col].agg(['count', 'mean'])
  stats = X_tr.groupby(cat_col)[target_col].agg(['count', 'mean'])
  stats = X_tr.groupby(cat_col)[target_col].agg(['count', 'mean'])
  stats = X_tr.groupby(cat_col)[target_col].agg(['count', 'mean'])
  all_stats = train_df.groupby(cat_col)[target_col].agg(['count', 'mean'])


In [None]:
def create_features_base(df):
    """
    データフレームを受け取り、身体指数とポジション内偏差値の特徴量を追加して返す関数
    'School_Draft_Rate'はTarget Encodingで別途処理するため、ここでは作成しない。
    """
    df_copy = df.copy()

    # 1. 身体指数 (Composite Indices) の作成
    df_copy['BMI'] = df_copy['Weight'] / (df_copy['Height'] ** 2)
    df_copy['Momentum_Index'] = df_copy['Weight'] / df_copy['Sprint_40yd']

    # 2. ポジション内偏差値 (Position-based Z-scores) の作成
    phys_cols = ['Height', 'Weight', 'Sprint_40yd', 'Vertical_Jump',
                 'Bench_Press_Reps', 'Broad_Jump', 'Agility_3cone', 'Shuttle']

    for col in phys_cols:
        new_col_name = f"{col}_Z"
        # ポジションごとにグループ化して標準化
        df_copy[new_col_name] = df_copy.groupby('Position')[col].transform(
            lambda x: (x - x.mean()) / x.std()
        )

    # calculate_feature が事前に定義されていることを前提
    # thf3Nd_Aag5o セルで定義された logic_map と calculate_feature を使用
    df_copy['selected_feature_raw'] = df_copy.apply(calculate_feature, axis=1)

    def safe_zscore(x):
        if x.std() == 0:
            return 0  # Handle cases where std is 0 (e.g., all values are the same)
        return (x - x.mean()) / x.std()

    df_copy['selected_feature_std'] = df_copy.groupby('Position')['selected_feature_raw'].transform(safe_zscore)

    return df_copy

# 特徴量の作成（School_Draft_Rate以外）
train_df = create_features_base(train_df)
test_df = create_features_base(test_df)

# School_Draft_Rate の Target Encoding
# add_target_encoding関数は v4IB91Bvo-a5 セルで定義されていると仮定
train_school_te, school_te_mapping = add_target_encoding(
    train_df, 'Drafted', 'School', n_splits=5, smooth_weight=10
)
train_df['School_Draft_Rate'] = train_school_te

# テストデータには学習データで作成したマッピングを適用し、未見の学校はグローバル平均で補完
test_df['School_Draft_Rate'] = test_df['School'].map(school_te_mapping).fillna(school_te_mapping.mean())

# 結果の確認 (表示する特徴量リストを調整)
new_features_for_display = ['BMI', 'Momentum_Index', 'Sprint_40yd_Z', 'Weight_Z', 'School_Draft_Rate', 'selected_feature_std']
print(train_df[['Position', 'School', 'Drafted'] + new_features_for_display].head())


  df_copy[new_col_name] = df_copy.groupby('Position')[col].transform(
  df_copy['selected_feature_std'] = df_copy.groupby('Position')['selected_feature_raw'].transform(safe_zscore)


  Position             School  Drafted        BMI  Momentum_Index  \
0       OG             Lehigh      1.0  38.621956       26.003718   
1       WR  Abilene Christian      1.0  26.039614       20.206435   
2       WR       Colorado St.      1.0  27.046212       20.617835   
3       DT      East Carolina      1.0  39.925004       29.229528   
4       WR         California      1.0  26.063390       19.844666   

   Sprint_40yd_Z  Weight_Z  School_Draft_Rate  selected_feature_std  
0       0.518817 -0.532196           0.648328              0.445609  
1      -1.927475 -0.706408           0.706940                   NaN  
2       0.079019  0.165774           0.551752              0.320703  
3       0.051668  1.396888           0.499120              1.396888  
4       1.383241  0.031592           0.670951              0.062095  


In [None]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score




# 2. Position-based Rank (相対順位)
phys_cols = ['Height', 'Weight', 'Sprint_40yd', 'Vertical_Jump', 'Bench_Press_Reps', 'Broad_Jump', 'Agility_3cone', 'Shuttle']
rank_cols = []
for col in phys_cols:
    new_col = f"{col}_YearPos_Rank"
    train_df[new_col] = train_df.groupby(['Year', 'Position'])[col].rank(pct=True)
    rank_cols.append(new_col)

# 3. Positionをカテゴリ型に変換 (LightGBM用)
train_df['Position'] = train_df['Position'].astype('category')

# --- 学習データ準備 ---
base_features = ['Age', 'Height', 'Weight', 'Sprint_40yd', 'Vertical_Jump',
                 'Bench_Press_Reps', 'Broad_Jump', 'Agility_3cone', 'Shuttle']
features = base_features + rank_cols + ['School_Draft_Rate', 'Position']
target = 'Drafted'

X = train_df[features]
y = train_df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# --- モデル学習 (Simple Baseline) ---
clf = lgb.LGBMClassifier(random_state=42)
clf.fit(X_train, y_train)

# --- 評価 ---
y_pred = clf.predict(X_test)
y_prob = clf.predict_proba(X_test)[:, 1]

print("--- Simple LightGBM Results ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"AUC:      {roc_auc_score(y_test, y_prob):.4f}")

  train_df[new_col] = train_df.groupby(['Year', 'Position'])[col].rank(pct=True)


--- Simple LightGBM Results ---
Accuracy: 0.7846
AUC:      0.8022


In [None]:
# Set up Stratified K-Fold cross-validation
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

lgbm_oof_preds = np.zeros(len(X))
lgbm_test_preds = np.zeros(len(X_test))

# Store AUC scores for each fold
lgbm_fold_auc_scores = []

for fold, (train_index, val_index) in enumerate(skf.split(X, y)):
    print(f"\n===== Fold {fold+1}/{n_splits} =====")

    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]

    # Initialize and train LightGBM Classifier
    lgbm_model = lgb.LGBMClassifier(
        objective='binary',
        metric='auc',
        n_estimators=1000,
        learning_rate=0.05,
        num_leaves=31,
        max_depth=-1,
        min_child_samples=20,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        n_jobs=-1,
        reg_alpha=0.1,
        reg_lambda=0.1
    )

    lgbm_model.fit(X_train, y_train,
                    eval_set=[(X_val, y_val)],
                    eval_metric='auc',
                    callbacks=[lgb.early_stopping(stopping_rounds=100, verbose=False)])

    # Make predictions on the validation set for OOF predictions
    val_preds = lgbm_model.predict_proba(X_val)[:, 1]
    lgbm_oof_preds[val_index] = val_preds

    # Make predictions on the test set
    test_preds = lgbm_model.predict_proba(X_test)[:, 1]
    lgbm_test_preds += test_preds / n_splits

    # Calculate AUC for the current fold
    fold_auc = roc_auc_score(y_val, val_preds)
    lgbm_fold_auc_scores.append(fold_auc)
    print(f"Fold {fold+1} AUC: {fold_auc:.4f}")

# Calculate overall OOF AUC
overall_lgbm_auc = roc_auc_score(y, lgbm_oof_preds)
print(f"\nOverall LightGBM OOF AUC: {overall_lgbm_auc:.4f}")
print(f"Average LightGBM Fold AUC: {np.mean(lgbm_fold_auc_scores):.4f} +/- {np.std(lgbm_fold_auc_scores):.4f}")


===== Fold 1/5 =====
Fold 1 AUC: 0.8124

===== Fold 2/5 =====
Fold 2 AUC: 0.8481

===== Fold 3/5 =====
Fold 3 AUC: 0.8677

===== Fold 4/5 =====
Fold 4 AUC: 0.7899

===== Fold 5/5 =====
Fold 5 AUC: 0.8444

Overall LightGBM OOF AUC: 0.8288
Average LightGBM Fold AUC: 0.8325 +/- 0.0277


In [None]:
!pip install optuna
import optuna
import lightgbm as lgb
from sklearn.metrics import log_loss
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split

# Split the preprocessed training data (X, y) into training and validation sets for Optuna
X_train_optuna, X_val_optuna, y_train_optuna, y_val_optuna = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

def objective(trial):
    # 探索するパラメータの範囲定義
    param = {
        'objective': 'binary',
        'metric': 'auc',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0, log=True),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': 1000  # 多めに設定し、Early Stoppingで止める
    }

    # LightGBMのデータセット形式にはせず、sklearn APIのままFit
    # Early StoppingのためにValidationセットを渡す
    model = lgb.LGBMClassifier(**param, random_state=42)

    callbacks = [lgb.early_stopping(stopping_rounds=100, verbose=False)]

    model.fit(
        X_train_optuna, # Use Optuna's training split
        y_train_optuna, # Use Optuna's training split
        eval_set=[(X_val_optuna, y_val_optuna)], # Use Optuna's validation split for evaluation
        eval_metric='auc',
        callbacks=callbacks
    )

    # 最良スコアで予測
    preds = model.predict_proba(X_val_optuna)[:, 1] # Predict on Optuna's validation split
    auc = roc_auc_score(y_val_optuna, preds)
    return auc

# --- 最適化の実行 ---
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50) # トライアル回数は調整してください

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

# --- 最適パラメータで再学習 ---
best_params = trial.params
best_clf = lgb.LGBMClassifier(**best_params, random_state=42)
# Train the final model on the full preprocessed training data (X, y)
best_clf.fit(X, y)

# Evaluate on the validation set used for tuning, or the full test set if desired
# For consistency with the objective function's evaluation, we'll evaluate on X_val_optuna
y_pred_tuned = best_clf.predict(X_val_optuna)
y_prob_tuned = best_clf.predict_proba(X_val_optuna)[:, 1]

print("\n--- Tuned LightGBM Results (on Optuna Validation Set) ---")
print(f"Accuracy: {accuracy_score(y_val_optuna, y_pred_tuned):.4f}")
print(f"AUC:      {roc_auc_score(y_val_optuna, y_prob_tuned):.4f}")



[I 2025-12-07 10:08:37,706] A new study created in memory with name: no-name-81f3391f-0d03-4f4e-91dc-fdd3ed115a73
[I 2025-12-07 10:08:38,846] Trial 0 finished with value: 0.8060800497484313 and parameters: {'lambda_l1': 8.131803034549607e-06, 'lambda_l2': 0.0325621967953877, 'num_leaves': 250, 'feature_fraction': 0.9862920549220071, 'bagging_fraction': 0.8111524846576419, 'bagging_freq': 6, 'min_child_samples': 13, 'learning_rate': 0.03185105711338329}. Best is trial 0 with value: 0.8060800497484313.
[I 2025-12-07 10:08:39,093] Trial 1 finished with value: 0.7928656227033749 and parameters: {'lambda_l1': 0.04775528013615739, 'lambda_l2': 8.965176809669632e-08, 'num_leaves': 121, 'feature_fraction': 0.591128801624702, 'bagging_fraction': 0.976410758696908, 'bagging_freq': 4, 'min_child_samples': 42, 'learning_rate': 0.25302137065174635}. Best is trial 0 with value: 0.8060800497484313.
[I 2025-12-07 10:08:39,234] Trial 2 finished with value: 0.8226157499010684 and parameters: {'lambda_l1

Best trial:
  Value: 0.8318022499858669
  Params: 
    lambda_l1: 0.0053581028763192195
    lambda_l2: 0.016007994745208018
    num_leaves: 67
    feature_fraction: 0.8724177680267724
    bagging_fraction: 0.590699275778822
    bagging_freq: 1
    min_child_samples: 68
    learning_rate: 0.09928420732225533

--- Tuned LightGBM Results (on Optuna Validation Set) ---
Accuracy: 0.8941
AUC:      0.9621


In [None]:
fixed_params={
    lambda_l1: 3.2937645902025746e-05
    lambda_l2: 0.13846079352331866
    num_leaves: 224
    feature_fraction: 0.7155999573321066
    bagging_fraction: 0.47850198865960336
    bagging_freq: 5
    min_child_samples: 55
    learning_rate: 0.2674949781898659
}

best_clf = lgb.LGBMClassifier(**fixed_params, random_state=42)
# Train the final model on the full preprocessed training data (X, y)
best_clf.fit(X, y)

# Evaluate on the validation set used for tuning, or the full test set if desired
# For consistency with the objective function's evaluation, we'll evaluate on X_val_optuna
y_pred_tuned = best_clf.predict(X_val_optuna)
y_prob_tuned = best_clf.predict_proba(X_val_optuna)[:, 1]

SyntaxError: invalid syntax. Perhaps you forgot a comma? (ipython-input-3579810904.py, line 2)

In [None]:
# このセルを実行すると提出用のCSVファイルが作成されます
submission = pd.read_csv(PATH + 'sample_submission.csv') # PATHは必要に応じて変更の必要があります
submission["Drafted"] = best_clf.predict_proba(X_test)[:, 1] # Use the best model to predict probabilities on the actual test set (X_test)
submission.to_csv(PATH + 'baseline_submission.csv', index=False)

In [None]:
len(train_df.columns)

37

In [None]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

def adversarial_validation(train_df, test_df, drop_cols=[]):
    """
    trainとtestを識別できるか検証し、分布が乖離している特徴量を特定する関数
    """
    # データをコピーしてラベル付け
    train_adv = train_df.copy()
    test_adv = test_df.copy()

    train_adv['Is_Test'] = 0
    test_adv['Is_Test'] = 1

    # 結合
    adv_data = pd.concat([train_adv, test_adv], axis=0).reset_index(drop=True)

    # 目的変数と特徴量
    y_adv = adv_data['Is_Test']

    # 識別には使わないカラム（IDやターゲット、および指定されたカラム）を除外
    cols_to_drop = ['Is_Test', 'Drafted', 'Id'] + drop_cols
    cols_to_drop = [c for c in cols_to_drop if c in adv_data.columns]
    X_adv = adv_data.drop(columns=cols_to_drop)

    # カテゴリ変数の処理（LightGBM用）
    cat_cols = X_adv.select_dtypes(include=['object', 'category']).columns.tolist()
    for c in cat_cols:
        X_adv[c] = X_adv[c].astype('category')

    # モデル学習 (5-Fold CV)
    model = lgb.LGBMClassifier(random_state=42, verbose=-1)
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    feature_importances = pd.DataFrame()
    feature_importances['feature'] = X_adv.columns
    feature_importances['importance'] = 0
    scores = []

    print(f"--- Adversarial Validation Start ---")
    for fold, (train_idx, val_idx) in enumerate(skf.split(X_adv, y_adv)):
        X_tr, X_val = X_adv.iloc[train_idx], X_adv.iloc[val_idx]
        y_tr, y_val = y_adv.iloc[train_idx], y_adv.iloc[val_idx]

        model.fit(X_tr, y_tr, categorical_feature=cat_cols)

        val_preds = model.predict_proba(X_val)[:, 1]
        score = roc_auc_score(y_val, val_preds)
        scores.append(score)

        feature_importances['importance'] += model.feature_importances_ / 5

    mean_auc = np.mean(scores)
    print(f"Adversarial Validation AUC: {mean_auc:.4f}")

    if mean_auc > 0.7:
        print("!! 警告: TrainとTestの分布が大きく異なります !!")
    else:
        print("分布の乖離は許容範囲内です。")

    return feature_importances.sort_values(by='importance', ascending=False)

adversarial_validation(train_df,test_df,drop_cols=[])

# --- 実行例 ---
# 実際に使用する際は、特徴量作成済みの train_df, test_df を渡してください
# adv_importance = adversarial_validation(train_df, test_df, drop_cols=['Year'])
# print(adv_importance.head(10))

--- Adversarial Validation Start ---
Adversarial Validation AUC: 1.0000
!! 警告: TrainとTestの分布が大きく異なります !!


Unnamed: 0,feature,importance
4,Weight,119.0
28,Height_YearPos_Rank,101.0
0,Year,97.4
1,Age,44.6
5,Sprint_40yd,43.0
3,Height,25.4
9,Agility_3cone,10.2
15,selected_feature_std,6.0
25,Agility_3cone_Z,5.0
6,Vertical_Jump,4.6
