In [6]:
import pandas as pd
from google.colab import data_table

dataset = pd.read_csv("blackjack_simulator.csv")

dataset

import numpy as np
import pandas as pd
import ast

# Discretizzazione target (3 classi) come prima
condizioni = [
    (dataset['win'] > 0),
    (dataset['win'] == 0),
    (dataset['win'] < 0)
]
valori_target = [1, 0, -1]
dataset['win'] = np.select(condizioni, valori_target)

print("Distribuzione win (3-classi):")
print(dataset['win'].value_counts())

to_drop = [
    "shoe_id",
    "dealer_final",
    "dealer_final_value",
    "player_final",
    "player_final_value",
    "actions_taken",
    "run_count",
    "true_count",
    "cards_remaining",
]
dataset = dataset.drop(columns=to_drop, errors="ignore")

def parse_hand(x):
    if isinstance(x, list):
        return x
    if isinstance(x, str):
        try:
            parsed = ast.literal_eval(x)
            return parsed if isinstance(parsed, list) else []
        except Exception:
            return []
    return []

def card_value(card):
    if isinstance(card, (int, float)):
        return int(card)
    c = str(card).strip().upper()
    if c in {"J", "Q", "K"}:
        return 10
    if c == "A":
        return 11
    try:
        return int(c)
    except Exception:
        return 0

def hand_value_and_soft(hand):
    values = []
    aces_total = 0
    for c in hand:
        if str(c).strip().upper() == "A":
            aces_total += 1
        values.append(card_value(c))

    total = sum(values)
    aces_left = aces_total
    while total > 21 and aces_left > 0:
        total -= 10
        aces_left -= 1

    is_soft = 1 if aces_left < aces_total else 0
    return total, is_soft

dataset["initial_hand"] = dataset["initial_hand"].apply(parse_hand)

tmp = dataset["initial_hand"].apply(
    lambda h: hand_value_and_soft(h) if isinstance(h, list) else (0, 0)
)

dataset["player_sum"] = tmp.apply(lambda x: x[0]).astype(int)
dataset["player_is_soft"] = tmp.apply(lambda x: x[1]).astype(int)

dataset["player_pair"] = dataset["initial_hand"].apply(
    lambda h: 1 if isinstance(h, list) and len(h) == 2 and card_value(h[0]) == card_value(h[1]) else 0
).astype(int)

dataset["dealer_up"] = dataset["dealer_up"].apply(card_value).astype(int)

dataset_finale = dataset[
    ["player_sum", "player_is_soft", "player_pair", "dealer_up", "win"]
].copy()

print("\nPrime righe dataset_finale:")
print(dataset_finale.head())

# win_bin = 1 se win >= 0 (pareggio o vittoria)
# win_bin = 0 se win == -1 (sconfitta)
dataset_finale["win_bin"] = (dataset_finale["win"] >= 0).astype(int)

print("\nDistribuzione win_bin (0=Perdita, 1=Non-perdita):")
print(dataset_finale["win_bin"].value_counts())

Distribuzione win (3-classi):
win
-1    493992
 1    421940
 0     84068
Name: count, dtype: int64

Prime righe dataset_finale:
   player_sum  player_is_soft  player_pair  dealer_up  win
0          21               0            0          5    1
1          15               0            0          6    1
2          20               0            1         11   -1
3          12               0            0         10    1
4          15               0            0         10   -1

Distribuzione win_bin (0=Perdita, 1=Non-perdita):
win_bin
1    506008
0    493992
Name: count, dtype: int64
