In [1]:
import kagglehub, os
path = kagglehub.dataset_download("divyaraj2006/the-complete-pokemo-dataset")
print(path, os.listdir(path))


Using Colab cache for faster access to the 'the-complete-pokemo-dataset' dataset.
/kaggle/input/the-complete-pokemo-dataset ['Pokemon.csv']


In [3]:
# -------- Bootstrap Pokémon (runtime propre, sans pip install) --------
import os, numpy as np, pandas as pd

# 1) Trouver Pokemon.csv
def find_csv():
    if "path" in globals():
        p = os.path.join(path, "Pokemon.csv")
        if os.path.exists(p): return p
    if os.path.exists("Pokemon.csv"): return "Pokemon.csv"
    raise FileNotFoundError("Pokemon.csv introuvable. Uploade le fichier ou relance kagglehub pour obtenir 'path'.")

csv_path = find_csv()
raw = pd.read_csv(csv_path)

# 2) Préparer df + rôles
def prepare_df(poke: pd.DataFrame) -> pd.DataFrame:
    df = poke.copy()
    rename = {'#':'id','Name':'name','Type 1':'type_1','Type 2':'type_2','HP':'hp','Attack':'atk',
              'Defense':'def','Sp. Atk':'spa','Sp. Def':'spd','Speed':'spe',
              'Generation':'generation','Legendary':'legendary'}
    df = df.rename(columns={k:v for k,v in rename.items() if k in df.columns})
    for c in ['name','type_1','type_2']:
        if c in df: df[c] = df[c].astype(str).str.strip()
    for c in ['hp','atk','def','spa','spd','spe']:
        if c in df: df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0)
    df['total'] = df[['hp','atk','def','spa','spd','spe']].sum(axis=1)
    df['legendary'] = df.get('legendary', False)
    df['legendary'] = df['legendary'].astype(str).str.lower().isin(['true','1','yes'])
    df['type_2'] = df['type_2'].replace(['nan','NaN','None',''], np.nan)
    # normalisation
    for c in ['hp','atk','def','spa','spd','spe','total']:
        mn, mx = df[c].min(), df[c].max()
        df[c+'_n'] = (df[c]-mn)/(mx-mn+1e-9)
    # équilibre
    disp = df[['hp','atk','def','spa','spd','spe']].std(axis=1, ddof=0)
    df['balance_n'] = (disp.max()-disp)/(disp.max()-disp.min()+1e-9)
    # scores rôle
    off_n  = np.maximum(df['atk_n'], df['spa_n'])
    bulk_n = 0.50*df['hp_n'] + 0.25*df['def_n'] + 0.25*df['spd_n']
    df['score_sweeper'] = 0.50*off_n + 0.40*df['spe_n'] + 0.10*df['total_n']
    df['score_tank']    = 0.60*bulk_n + 0.40*((df['def_n']+df['spd_n'])/2)
    df['score_lead']    = 0.65*df['spe_n'] + 0.20*off_n + 0.15*df['total_n']
    df['support_prop']  = 0.50*bulk_n + 0.25*df['balance_n'] + 0.10*(1.0-off_n) + 0.10*(1.0-df['spe_n'].clip(0.7,1.0)) + 0.05*(~df['legendary']).astype(float)
    df['score_support'] = df['support_prop']
    role_cols = ['score_sweeper','score_tank','score_support','score_lead']
    df['role'] = df[role_cols].idxmax(axis=1).str.replace('score_','', regex=False)
    df.loc[df['score_support'] >= df[['score_sweeper','score_lead']].max(axis=1) + 0.05, 'role'] = 'support'
    return df

df = prepare_df(raw)

# 3) Helpers + build_team (≥1 support, ≤2 sweepers, ≤2 légendaires)
def _types_of_row(r):
    t=[];
    if pd.notna(r.get('type_1')): t.append(r['type_1'])
    if pd.notna(r.get('type_2')): t.append(r['type_2'])
    return set(t)

def _count_legendaries(rows_df): return int(sum(1 for _,x in rows_df.iterrows() if bool(x.get('legendary',False))))

def _diversity_bonus(row, used_types, a=10.0, b=5.0, g=2.0):
    rtypes=_types_of_row(row); new=sum(1 for t in rtypes if t not in used_types); dup=len(rtypes)-new
    score=a*new - b*dup
    if row.get('type_1') in used_types: score -= g
    return score

def _pick_support(df, chosen, used, max_leg, team):
    cand = df[~df['name'].isin(chosen)].copy()
    if _count_legendaries(team) >= max_leg: cand = cand[~cand['legendary']]
    if cand.empty: return None
    cand['score_adj'] = cand['support_prop'] + cand.apply(lambda r: _diversity_bonus(r, used), axis=1)
    cand['score_adj'] += (cand['role']=='support').astype(float)*0.5
    return cand.sort_values('score_adj', ascending=False).iloc[0]

def _pick_for_role(df, role, chosen, used, max_leg, team):
    if role=='support': return _pick_support(df, chosen, used, max_leg, team)
    cand = df[~df['name'].isin(chosen)].copy()
    if _count_legendaries(team) >= max_leg: cand = cand[~cand['legendary']]
    if cand.empty: return None
    exact = cand[cand['role']==role]
    if not exact.empty: cand = exact
    cand['score_adj'] = cand[f'score_{role}'] + cand.apply(lambda r: _diversity_bonus(r, used), axis=1)
    return cand.sort_values('score_adj', ascending=False).iloc[0]

def _pick_balance(df, chosen, used, max_leg, team, avoid_extra_sweeper=False):
    cand = df[~df['name'].isin(chosen)].copy()
    if _count_legendaries(team) >= max_leg: cand = cand[~cand['legendary']]
    if avoid_extra_sweeper: cand = cand[cand['role']!='sweeper']
    if cand.empty: return None
    def bal_score(r): return r.get('total_n',0.0) + 1.6*len([t for t in _types_of_row(r) if t not in used])
    cand['score_adj'] = cand.apply(bal_score, axis=1)
    cand['score_adj'] -= cand['type_1'].map(lambda t: 2.0 if t in used else 0)
    return cand.sort_values('score_adj', ascending=False).iloc[0]

def build_team(start_name, df, team_size=6, include_roles=("tank","sweeper","lead"),
               max_sweepers=2, max_legendaries=2, min_supports=1, target_distinct_types=5):
    if start_name not in set(df['name']): raise ValueError(f"{start_name} introuvable.")
    team_rows=[]; starter=df.loc[df['name']==start_name].iloc[0]
    team_rows.append(starter); chosen={starter['name']}; used=_types_of_row(starter)
    sweeper_count=int(starter.get('role')=='sweeper'); support_count=int(starter.get('role')=='support')
    # garantir ≥1 support tôt
    if support_count<min_supports and len(team_rows)<team_size:
        pick=_pick_support(df, chosen, used, max_legendaries, pd.DataFrame(team_rows))
        if pick is not None:
            team_rows.append(pick); chosen.add(pick['name']); used|=_types_of_row(pick)
            support_count+=1; sweeper_count+=int(pick.get('role')=='sweeper')
    # rôles coeur
    for role in include_roles:
        if len(team_rows)>=team_size: break
        if role=='sweeper' and sweeper_count>=max_sweepers: continue
        pick=_pick_for_role(df, role, chosen, used, max_legendaries, pd.DataFrame(team_rows))
        if pick is None: continue
        team_rows.append(pick); chosen.add(pick['name']); used|=_types_of_row(pick)
        sweeper_count+=int(pick.get('role')=='sweeper'); support_count+=int(pick.get('role')=='support')
    # compléter
    while len(team_rows)<team_size:
        avoid = sweeper_count>=max_sweepers
        pick=_pick_balance(df, chosen, used, max_legendaries, pd.DataFrame(team_rows), avoid_extra_sweeper=avoid)
        if pick is None: break
        team_rows.append(pick); chosen.add(pick['name']); used|=_types_of_row(pick)
        sweeper_count+=int(pick.get('role')=='sweeper'); support_count+=int(pick.get('role')=='support')
    team=pd.DataFrame(team_rows).reset_index(drop=True)
    # post-garantie support
    if support_count<min_supports:
        pool=df[(~df['name'].isin(set(team['name'])))]
        if _count_legendaries(team)>=max_legendaries: pool=pool[~pool['legendary']]
        if not pool.empty:
            pool=pool.copy(); pool['score_adj']=pool['support_prop']
            repl_idx=team.index[1:]
            if len(repl_idx)>0:
                victim = team.loc[repl_idx,'total_n'].idxmin() if 'total_n' in team.columns else repl_idx[-1]
                best_sup=pool.sort_values('score_adj', ascending=False).iloc[0]
                team.iloc[victim]=best_sup
    return team

# 4) Exemple
team = build_team("Charizard", df)
team



Unnamed: 0,id,name,type_1,type_2,Total,hp,atk,def,spa,spd,...,spe_n,total_n,balance_n,score_sweeper,score_tank,score_lead,support_prop,score_support,role,score_adj
0,6,Charizard,Fire,Flying,534,78,84,78,109,85,...,0.542857,0.59,0.887848,0.545165,0.312834,0.548966,0.503191,0.503191,lead,
1,251,Celebi,Psychic,Grass,600,100,100,100,100,100,...,0.542857,0.7,1.0,0.5439,0.39804,0.56056,0.576486,0.576486,support,21.076486
2,208,SteelixMega Steelix,Steel,Ground,610,75,125,230,55,95,...,0.142857,0.716667,0.373572,0.453134,0.562402,0.330087,0.451006,0.451006,tank,20.562402
3,94,GengarMega Gengar,Ghost,Poison,600,60,65,80,170,95,...,0.714286,0.7,0.623727,0.790497,0.311352,0.743199,0.391927,0.391927,sweeper,20.790497
4,428,LopunnyMega Lopunny,Normal,Fighting,580,65,136,94,54,96,...,0.742857,0.666667,0.697547,0.717864,0.340702,0.724479,0.436965,0.436965,lead,20.724479
5,646,KyuremBlack Kyurem,Dragon,Ice,700,125,170,100,120,90,...,0.514286,0.866667,0.738133,0.738327,0.410901,0.642664,0.441836,0.441836,sweeper,4.066667


In [4]:
# 1) Exemple d’équipe
team = build_team("Charizard", df)
team.to_csv("team_charizard.csv", index=False)

# 2) requirements (minimal)
with open("requirements.txt", "w") as f:
    f.write("pandas\nnumpy\n")

# 3) .gitignore pour éviter d’uploader le gros CSV Kaggle
with open(".gitignore", "w") as f:
    f.write("Pokemon.csv\n/data/\n.ipynb_checkpoints/\n.DS_Store\n")
