In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA

# === 1. Загрузка данных ===
df = pd.read_csv("/kaggle/input/tst-day-2/train.csv")
sample = pd.read_csv("/kaggle/input/tst-day-2/sample_submission.csv")

# === 2. Мета-признаки ===
df["attacking_skill"] = df[["finishing", "positioning", "shot_power", "volleys", "long_shots"]].mean(axis=1)
df["passing_ability"] = df[["short_passing", "long_passing", "vision", "crossing"]].mean(axis=1)
df["dribble_mobility"] = df[["dribbling", "agility", "balance", "ball_control"]].mean(axis=1)
df["pace"] = df[["acceleration", "sprint_speed"]].mean(axis=1)
df["defense_skill"] = df[["interceptions", "standing_tackle", "sliding_tackle", "defensive_awareness"]].mean(axis=1)
df["physicality"] = df[["strength", "stamina", "jumping", "aggression"]].mean(axis=1)
df["set_piece_specialist"] = df[["curve", "fk_accuracy", "penalties"]].mean(axis=1)
df["goalkeeper_score"] = df[["gk_diving", "gk_handling", "gk_kicking", "gk_positioning", "gk_reflexes"]].mean(axis=1)
df["composure_score"] = df[["composure", "reactions"]].mean(axis=1)
df["offensive_support"] = df[["vision", "positioning", "short_passing"]].mean(axis=1)

# Новые признаки
df["attack_support"] = df[["vision", "short_passing", "composure"]].mean(axis=1)
df["defending_positioning"] = df[["defensive_awareness", "positioning", "aggression"]].mean(axis=1)

# === 3. Вратари ===
gk_cols = ["gk_diving", "gk_handling", "gk_kicking", "gk_positioning", "gk_reflexes"]
df["is_gk"] = df[gk_cols].gt(40).all(axis=1)

# === 4. Деление ===
gk_df = df[df["is_gk"]].copy()
field_df = df[~df["is_gk"]].copy()

# === 5. Признаки ===
features = [
    "attacking_skill", "passing_ability", "dribble_mobility", "pace",
    "defense_skill", "physicality", "set_piece_specialist",
    "composure_score", "offensive_support", "attack_support", "defending_positioning"
]
gk_features = ["goalkeeper_score"]

def preprocess(X):
    X = SimpleImputer(strategy="mean").fit_transform(X)
    X = StandardScaler().fit_transform(X)
    return X

X_field = preprocess(field_df[features])
X_gk = preprocess(gk_df[gk_features])

# === 6. Оптимальный n_clusters для field игроков (по BIC или силуэту) ===
best_score = -1
best_k = 6
for k in range(5, 15):
    gmm = GaussianMixture(n_components=k, random_state=42)
    labels = gmm.fit_predict(X_field)
    score = silhouette_score(X_field, labels)
    if score > best_score:
        best_score = score
        best_k = k

print(f"Лучшее число кластеров: {best_k}")

# === 7. Кластеризация ===
gmm_field = GaussianMixture(n_components=best_k, random_state=42).fit(X_field)
gmm_gk = GaussianMixture(n_components=1, random_state=42).fit(X_gk)

field_clusters = gmm_field.predict(X_field)
gk_clusters = gmm_gk.predict(X_gk)

field_df["cluster"] = field_clusters
gk_df["cluster"] = gk_clusters + best_k  # отдельно

# === 8. Объединение ===
df_out = pd.concat([field_df, gk_df], axis=0)
df_out = df_out[["id", "cluster"]].sort_values("id")

# === 9. Финальный submission ===
submission = sample.drop(columns=["cluster"], errors="ignore").merge(df_out, on="id", how="left")
submission.to_csv("submission.csv", index=False)


  return op(a, b)


Лучшее число кластеров: 6
