<a href="https://colab.research.google.com/github/fran53759/nba_games_prediction/blob/modelVersions/neumreProjekt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **NBA games prediction project**

In [1]:
# Imports
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import teams
import pandas as pd
import numpy as np

### 1) DATA COLLECTION

In [2]:
# Function for getting all regular season games from specified season
def download_season(season):
    print(f"Downloading season {season}...")
    games = leaguegamefinder.LeagueGameFinder(
            season_nullable=season,
            season_type_nullable='Regular Season'
        )
    df = games.league_game_finder_results.get_data_frame()
    return df

# Note: Train games must be games held before test games 
train_games_df = pd.concat([download_season('2022-23'),
                            download_season('2023-24')],
                            ignore_index=True)
test_games_df = download_season('2024-25')

print(f'Downloaded {len(train_games_df)} train games and {len(test_games_df)} test games!')

train_games_df.head()

Downloading season 2022-23...
Downloading season 2023-24...
Downloading season 2024-25...
Downloaded 4920 train games and 2460 test games!


Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22022,1610612739,CLE,Cleveland Cavaliers,22201218,2023-04-09,CLE vs. CHA,L,241,95,...,0.938,11,33,44,27,9,5,16,24,-11.0
1,22022,1610612763,MEM,Memphis Grizzlies,22201226,2023-04-09,MEM @ OKC,L,241,100,...,0.722,11,32,43,25,8,4,12,16,-15.0
2,22022,1610612743,DEN,Denver Nuggets,22201227,2023-04-09,DEN vs. SAC,W,240,109,...,0.72,15,36,51,25,11,2,16,15,14.0
3,22022,1610612748,MIA,Miami Heat,22201219,2023-04-09,MIA vs. ORL,W,241,123,...,0.75,7,37,44,30,10,3,18,20,13.0
4,22022,1610612752,NYK,New York Knicks,22201220,2023-04-09,NYK vs. IND,L,241,136,...,0.773,19,34,53,29,8,8,15,24,-5.0


In [3]:
def adjust_game_df(games_df):
    # Adding binary "WIN" column
    games_df["WIN"] = games_df["WL"].apply(lambda x: 1 if x == 'W' else 0)

    # Adding "HGA" (Home game advantage) column
    games_df["HGA"] = games_df["MATCHUP"].apply(lambda x: 1 if 'vs' in x else 0)

    # Converting int stat columns to float type
    int_columns = ['MIN','PTS','FGM','FGA','FG3M','FG3A','FTM','FTA','OREB','DREB','REB','AST','STL','BLK','TOV','PF','HGA']
    games_df[int_columns] = games_df[int_columns].astype(float)

    # Convert game date to pandas datetime
    games_df["GAME_DATE"] = pd.to_datetime(games_df["GAME_DATE"])

    # Sorting games by game_date
    games_df = games_df.sort_values(by='GAME_DATE').reset_index(drop=True)

    return games_df

train_games_df = adjust_game_df(train_games_df)
test_games_df = adjust_game_df(test_games_df)

train_games_df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,WIN,HGA
0,22022,1610612738,BOS,Boston Celtics,0022200001,2022-10-18,BOS vs. PHI,W,240.0,126.0,...,30.0,36.0,24.0,8.0,3.0,10.0,24.0,9.0,1,1.0
1,22022,1610612755,PHI,Philadelphia 76ers,0022200001,2022-10-18,PHI @ BOS,L,239.0,117.0,...,27.0,31.0,16.0,8.0,3.0,14.0,25.0,-9.0,0,0.0
2,22022,1610612747,LAL,Los Angeles Lakers,0022200002,2022-10-18,LAL @ GSW,L,241.0,109.0,...,39.0,48.0,23.0,12.0,4.0,21.0,18.0,-14.0,0,0.0
3,22022,1610612744,GSW,Golden State Warriors,0022200002,2022-10-18,GSW vs. LAL,W,241.0,123.0,...,37.0,48.0,31.0,11.0,4.0,18.0,23.0,14.0,1,1.0
4,22022,1610612758,SAC,Sacramento Kings,0022200014,2022-10-19,SAC vs. POR,L,241.0,108.0,...,37.0,41.0,27.0,8.0,5.0,15.0,25.0,-7.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,22023,1610612756,PHX,Phoenix Suns,0022301194,2024-04-14,PHX @ MIN,W,244.0,125.0,...,19.0,32.0,28.0,13.0,3.0,12.0,24.0,19.0,1,0.0
4916,22023,1610612751,BKN,Brooklyn Nets,0022301192,2024-04-14,BKN @ PHI,L,240.0,86.0,...,38.0,42.0,19.0,8.0,6.0,12.0,14.0,-21.0,0,0.0
4917,22023,1610612753,ORL,Orlando Magic,0022301191,2024-04-14,ORL vs. MIL,W,240.0,113.0,...,36.0,43.0,27.0,11.0,5.0,11.0,18.0,25.0,1,1.0
4918,22023,1610612742,DAL,Dallas Mavericks,0022301196,2024-04-14,DAL @ OKC,L,241.0,86.0,...,29.0,41.0,19.0,8.0,2.0,14.0,15.0,-49.0,0,0.0


### 2) FEATURE ENGINEERING

In [4]:
#ELO Rating System

from collections import defaultdict

from collections import defaultdict
import numpy as np

def add_pregame_elo_columns(df, initial_elo=1500.0, k=24.0, home_adv=70.0, season_alpha=0.75):

    df = df.sort_values(["GAME_DATE", "GAME_ID", "TEAM_ID"]).reset_index(drop=True)
    elo = defaultdict(lambda: float(initial_elo))

    df["ELO_PRE"] = np.nan
    df["OPP_ELO_PRE"] = np.nan
    df["ELO_DIFF"] = np.nan

    # Prefer SEASON_ID if present; fallback to year extracted from GAME_DATE
    if "SEASON_ID" in df.columns:
        season_series = df["SEASON_ID"]
    else:
        # fallback: season boundary around Aug/Sep; year alone is imperfect but works okay
        season_series = df["GAME_DATE"].dt.year

    current_season = None

    for game_id, g in df.groupby("GAME_ID", sort=False):
        if len(g) != 2:
            continue

        i1, i2 = g.index[0], g.index[1]

        # ----- season boundary check -----
        this_season = season_series.loc[i1]
        if current_season is None:
            current_season = this_season
        elif this_season != current_season:
            # partial reset for all teams that already have an Elo entry
            for t in list(elo.keys()):
                elo[t] = season_alpha * elo[t] + (1.0 - season_alpha) * float(initial_elo)
            current_season = this_season

        t1 = df.loc[i1, "TEAM_ABBREVIATION"]
        t2 = df.loc[i2, "TEAM_ABBREVIATION"]

        e1 = elo[t1]
        e2 = elo[t2]

        # store pre-game elos
        df.loc[i1, "ELO_PRE"] = e1
        df.loc[i2, "ELO_PRE"] = e2
        df.loc[i1, "OPP_ELO_PRE"] = e2
        df.loc[i2, "OPP_ELO_PRE"] = e1
        df.loc[i1, "ELO_DIFF"] = e1 - e2
        df.loc[i2, "ELO_DIFF"] = e2 - e1

        # actual result (team row i1)
        w1 = 1.0 if df.loc[i1, "WL"] == "W" else 0.0
        w2 = 1.0 - w1

        # home advantage
        r1 = e1 + (home_adv if df.loc[i1, "HGA"] == 1 else 0.0)
        r2 = e2 + (home_adv if df.loc[i2, "HGA"] == 1 else 0.0)

        exp1 = 1.0 / (1.0 + 10 ** ((r2 - r1) / 400.0))
        exp2 = 1.0 - exp1

        # update AFTER game
        elo[t1] = e1 + k * (w1 - exp1)
        elo[t2] = e2 + k * (w2 - exp2)

    return df


# Mark rows so we can concat -> compute Elo across all seasons -> split back
train_games_df["__SET__"] = "train"
test_games_df["__SET__"] = "test"

all_df = pd.concat([train_games_df, test_games_df], ignore_index=True)
all_df = add_pregame_elo_columns(all_df, k=24.0, home_adv=70.0, season_alpha=0.70)


train_games_df = all_df[all_df["__SET__"] == "train"].drop(columns=["__SET__"]).reset_index(drop=True)
test_games_df  = all_df[all_df["__SET__"] == "test"].drop(columns=["__SET__"]).reset_index(drop=True)

train_games_df[["GAME_DATE","TEAM_ABBREVIATION","MATCHUP","WL","ELO_PRE","OPP_ELO_PRE","ELO_DIFF"]].head()


Unnamed: 0,GAME_DATE,TEAM_ABBREVIATION,MATCHUP,WL,ELO_PRE,OPP_ELO_PRE,ELO_DIFF
0,2022-10-18,BOS,BOS vs. PHI,W,1500.0,1500.0,0.0
1,2022-10-18,PHI,PHI @ BOS,L,1500.0,1500.0,0.0
2,2022-10-18,GSW,GSW vs. LAL,W,1500.0,1500.0,0.0
3,2022-10-18,LAL,LAL @ GSW,L,1500.0,1500.0,0.0
4,2022-10-19,ORL,ORL @ DET,L,1500.0,1500.0,0.0


*-> Functions used for extracting feautures*

In [5]:
# Calculates win rate difference 
def win_rate_diff(team_recent_games, opponent_recent_games, game):
    return team_recent_games["WIN"].mean() - opponent_recent_games["WIN"].mean()

# Returns 1 if team has home advantage
def home_advantage(team_recent_games, opponent_recent_games, game):
    return game["HGA"]

# Calculates difference in average points scored between teams
def avg_points_diff(team_recent_games, opp_recent_games, game):
    return team_recent_games["PTS"].mean() - opp_recent_games["PTS"].mean()

# Calculates difference in three-point shooting percentage
def fg3_pct_diff(team_recent_games, opp_recent_games, game):
    team_pct = team_recent_games["FG3M"].sum() / max(team_recent_games["FG3A"].sum(), 1)
    opp_pct = opp_recent_games["FG3M"].sum() / max(opp_recent_games["FG3A"].sum(), 1)
    return team_pct - opp_pct

# Calculates difference in overall field goal percentage (2PT + 3PT)
def fg_pct_diff(team_recent_games, opp_recent_games, game):
    return (
        team_recent_games["FGM"].sum() / team_recent_games["FGA"].sum()
        - opp_recent_games["FGM"].sum() / opp_recent_games["FGA"].sum()
    )

# Calculates difference in average blocks per game
def blocks_diff(team_recent_games, opp_recent_games, game):
    return team_recent_games["BLK"].mean() - opp_recent_games["BLK"].mean()

# Calculates difference in average rebounds per game
def rebounds_diff(team_recent_games, opp_recent_games, game):
    return team_recent_games["REB"].mean() - opp_recent_games["REB"].mean()

# Calculates difference in average assists per game
def assists_diff(team_recent, opp_recent, game):
    return team_recent["AST"].mean() - opp_recent["AST"].mean()

# Calculates difference in average turnovers per game
def turnovers_diff(team_recent, opp_recent, game):
    return opp_recent["TOV"].mean() - team_recent["TOV"].mean()

#Calculate difference in Elo ratings
def elo(team_recent, opp_recent, game):
    return game["ELO_DIFF"]

#Calculate difference in rest days
def rest_days_diff(team_recent, opp_recent, game):
    game_date = game["GAME_DATE"]

    team_last_date = team_recent["GAME_DATE"].iloc[-1]
    opp_last_date  = opp_recent["GAME_DATE"].iloc[-1]

    team_rest = (game_date - team_last_date).days - 1
    opp_rest  = (game_date - opp_last_date).days - 1

    return team_rest - opp_rest

#Calculate difference in plus minus
def plus_minus_diff(team_recent, opp_recent, game):
    # LeagueGameFinder usually provides PLUS_MINUS. If not present, fallback to 0.
    if "PLUS_MINUS" in team_recent.columns and "PLUS_MINUS" in opp_recent.columns:
        return team_recent["PLUS_MINUS"].mean() - opp_recent["PLUS_MINUS"].mean()
    return 0.0

#Calculate difference in effective field goal percentage
def efg_diff(team_recent, opp_recent, game):
    team_fga = max(team_recent["FGA"].sum(), 1.0)
    opp_fga  = max(opp_recent["FGA"].sum(), 1.0)

    team_efg = (team_recent["FGM"].sum() + 0.5 * team_recent["FG3M"].sum()) / team_fga
    opp_efg  = (opp_recent["FGM"].sum() + 0.5 * opp_recent["FG3M"].sum()) / opp_fga

    return team_efg - opp_efg


FEATURE_FUNCTIONS = {  "win_rate_diff": win_rate_diff,
    "home_advantage": home_advantage,
    "avg_points_diff": avg_points_diff,
    "fg3_pct_diff": fg3_pct_diff,
    "fg_pct_diff": fg_pct_diff,
    "blocks_diff": blocks_diff,
    "rebounds_diff": rebounds_diff,
    "assists_diff" : assists_diff,
    "turnovers_diff": turnovers_diff,
    "elo_diff": elo,
    "rest_days_diff": rest_days_diff,
    "plus_minus_diff": plus_minus_diff,
    "efg_diff": efg_diff,  
    }

In [None]:
# Function that returns label and list of calculated features for specific game
def extract_features(game, all_games, feature_list, n_matches = 10):

    team_id = game["TEAM_ID"]
    opponent_id = get_opponent_id(game)
    game_date = game["GAME_DATE"]

    # Find last n games played by both teams
    team_recent_games = all_games[(all_games['TEAM_ID'] == team_id) & (all_games['GAME_DATE'] < game_date)].tail(n_matches)
    opponent_recent_games = all_games[(all_games['TEAM_ID'] == opponent_id) & (all_games['GAME_DATE'] < game_date)].tail(n_matches)

    # If there isn't enough information skip
    if len(team_recent_games) < n_matches or len(opponent_recent_games) < n_matches:
        return None

    # Generate features by callig feature functions defined in FEATURE_FUNCTIONS
    features = [
        FEATURE_FUNCTIONS[name](
            team_recent_games,
            opponent_recent_games,
            game
        )
        for name in feature_list
    ]

    label = game["WIN"]

    return label, features

# Function that returns opponent team id
def get_opponent_id(game):
    matchup = game["MATCHUP"]
    if 'vs' in matchup:
        opponent_abbreviation = matchup.split(' vs. ')[-1]
    else:
        opponent_abbreviation = matchup.split(' @ ')[-1]

    opponent_team = teams.find_team_by_abbreviation(opponent_abbreviation)
    return opponent_team["id"]

# Function that creates and returns dataset X and labels y
def create_datasets(games_df: pd.DataFrame, feature_list, n_matches = 10, print_progress = False):

    X = []
    y = []

    skipped = 0
    num_of_games = len(games_df)

    for idx, game in games_df.iterrows():
        result = extract_features(game, games_df, feature_list, n_matches)

        if result is None:
            skipped += 1
            continue

        label, features = result

        X.append(features)
        y.append(label)

        if (idx + 1) % 500 == 0 and print_progress:
            print(f"Processed {idx + 1}/{num_of_games} games...")

    if print_progress:
        print(f"Created dataset with {num_of_games - skipped} games.")
        print(f"Skipped {skipped} games (games with less than {n_matches} previous matches).\n")

    return np.array(X), np.array(y)

default_feature_list = ['win_rate_diff', 'home_advantage', 'avg_points_diff', 'fg_pct_diff',  'elo_diff', 'rest_days_diff', 'efg_diff']

X_train, y_train = create_datasets(train_games_df, default_feature_list, print_progress=True)
X_test, y_test = create_datasets(test_games_df, default_feature_list, print_progress=True)

Processed 500/4920 games...
Processed 1000/4920 games...
Processed 1500/4920 games...
Processed 2000/4920 games...
Processed 2500/4920 games...
Processed 3000/4920 games...
Processed 3500/4920 games...
Processed 4000/4920 games...
Processed 4500/4920 games...
Created dataset with 4606 games.
Skipped 314 games (games with less than 10 previous matches).

Processed 500/2460 games...
Processed 1000/2460 games...
Processed 1500/2460 games...
Processed 2000/2460 games...
Created dataset with 2150 games.
Skipped 310 games (games with less than 10 previous matches).



### 3) BASELINE MODEL (Logistic Regression)

In [13]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd

# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train_scaled, y_train)

predictions_lr = log_reg.predict(X_test_scaled)
prediction_prob_lr = log_reg.predict_proba(X_test_scaled)[:, 1]

print(f"LR Train Accuracy = {accuracy_score(y_train, log_reg.predict(X_train_scaled))}")
print(f"LR Test Accuracy  = {accuracy_score(y_test, predictions_lr)}")

df_lr = pd.DataFrame({
    "y_test": y_test,
    "predicted": predictions_lr,
    "winning_probability": prediction_prob_lr
})
display(df_lr.head())



LR Train Accuracy = 0.6461137646547981
LR Test Accuracy  = 0.6702325581395349


Unnamed: 0,y_test,predicted,winning_probability
0,0,0,0.425708
1,1,1,0.574292
2,1,1,0.604282
3,0,0,0.395718
4,0,1,0.598588


#### 3.1) Evaluation of different features and their predictive significance (with baseline model)

In [14]:
n_matches_list = [2, 5, 10]

FEATURE_GROUPS = {
    "1" : ["home_advantage", "win_rate_diff", "elo_diff"],
    "2": ["home_advantage", "win_rate_diff", "fg_pct_diff", "avg_points_diff", "elo_diff"],
    "3": ["win_rate_diff", "fg_pct_diff", "avg_points_diff", "elo_diff"],
    "4": ["home_advantage", "win_rate_diff", "fg3_pct_diff", "rebounds_diff", "turnovers_diff", "elo_diff"],
    "5": ["home_advantage", "win_rate_diff", "fg3_pct_diff", "rebounds_diff", "turnovers_diff", "avg_points_diff"],
    "6": ["rebounds_diff", "fg_pct_diff", "avg_points_diff"]
}

results = []

for n_matches in n_matches_list:
    for name, feature_list in FEATURE_GROUPS.items():

        # Creating datasets
        X_train, y_train = create_datasets(train_games_df, feature_list, n_matches)
        X_test, y_test = create_datasets(test_games_df, feature_list, n_matches)

        # Scaling features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Logistic Regression
        log_reg = LogisticRegression(max_iter=1000)
        log_reg.fit(X_train_scaled, y_train)

        acc_train = accuracy_score(y_train, log_reg.predict(X_train_scaled))
        acc_test = accuracy_score(y_test, log_reg.predict(X_test_scaled))

        results.append({
            "n_matches": n_matches,
            "feature_list": feature_list,
            "train_accuracy": acc_train,
            "test_accuracy": acc_test
        })

pd.set_option("display.max_colwidth", None)

results_df = pd.DataFrame(results).reset_index(drop=True)
display(results_df.sort_values("test_accuracy", ascending=False).style.hide(axis="index"))

pd.reset_option("display.max_colwidth")

KeyboardInterrupt: 

### 4) NEURAL NETWORK MODEL (MLP)

In [15]:
# MLP
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, log_loss

def _get_activation(name: str) -> nn.Module:
    name = name.lower()
    if name == "relu":
        return nn.ReLU()
    if name == "leakyrelu":
        return nn.LeakyReLU(0.01)
    if name == "tanh":
        return nn.Tanh()
    if name == "sigmoid":
        return nn.Sigmoid()
    raise ValueError(f"Nepoznata aktivacija: {name} (probaj: relu, leakyrelu, tanh, sigmoid)")

def _get_optimizer(name: str, params, lr: float, weight_decay: float):
    name = name.lower()
    if name == "adam":
        return torch.optim.Adam(params, lr=lr, weight_decay=weight_decay)
    if name == "sgd":
        return torch.optim.SGD(params, lr=lr, momentum=0.9, weight_decay=weight_decay)
    if name == "adamw":
        return torch.optim.AdamW(params, lr=lr, weight_decay=weight_decay)
    raise ValueError(f"Nepoznat optimizer: {name} (probaj: adam, sgd, adamw)")

class MLP(nn.Module):
    def __init__(self, input_dim: int, hidden_layers=(32, 16), activation="relu", dropout=0.2):
        super().__init__()
        act = _get_activation(activation)
        layers = []
        prev = input_dim

        for h in hidden_layers:
            layers.append(nn.Linear(prev, h))
            layers.append(act.__class__())
            if dropout and dropout > 0:
                layers.append(nn.Dropout(dropout))
            prev = h

        layers.append(nn.Linear(prev, 1))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

def train_mlp(
    X_train_scaled, y_train,
    X_test_scaled,  y_test,
    hidden_layers=(32, 16),
    activation="relu",
    optimizer_name="adam",
    lr=1e-3,
    weight_decay=1e-4,
    dropout=0.2,
    batch_size=64,
    epochs=50,
    seed=0,
    threshold=0.5,
    verbose_every=10,
):

    torch.manual_seed(seed)
    np.random.seed(seed)

    Xtr = torch.tensor(X_train_scaled, dtype=torch.float32)
    ytr = torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32)
    Xte = torch.tensor(X_test_scaled, dtype=torch.float32)

    loader = DataLoader(TensorDataset(Xtr, ytr), batch_size=batch_size, shuffle=True)

    model = MLP(
        input_dim=X_train_scaled.shape[1],
        hidden_layers=hidden_layers,
        activation=activation,
        dropout=dropout
    )

    criterion = nn.BCEWithLogitsLoss()
    optimizer = _get_optimizer(optimizer_name, model.parameters(), lr=lr, weight_decay=weight_decay)

    model.train()
    for epoch in range(epochs):
        running = 0.0
        for xb, yb in loader:
            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()
            running += loss.item() * xb.size(0)

        if verbose_every and (epoch + 1) % verbose_every == 0:
            print(f"Epoch {epoch+1}/{epochs} - loss: {running/len(loader.dataset):.4f}")

    model.eval()
    with torch.no_grad():
        logits = model(Xte)
        prob = torch.sigmoid(logits).cpu().numpy().reshape(-1)
        pred = (prob >= threshold).astype(int)

    acc = accuracy_score(y_test, pred)
    ll = log_loss(y_test, prob, labels=[0, 1])

    results = {
        "test_accuracy": acc,
        "test_logloss": ll,
        "prob": prob,
        "pred": pred,
        "model": model
    }
    return results


In [16]:
res1 = train_mlp(
    X_train_scaled, y_train, X_test_scaled, y_test,
    activation="relu",
    optimizer_name="adam",
    hidden_layers=(32,16),
    dropout=0.2,
    lr=1e-3,
    epochs=80,
    batch_size=64,
    weight_decay=1e-4
)
print(res1["test_accuracy"], res1["test_logloss"])


Epoch 10/80 - loss: 0.6321
Epoch 20/80 - loss: 0.6275
Epoch 30/80 - loss: 0.6242
Epoch 40/80 - loss: 0.6232
Epoch 50/80 - loss: 0.6229
Epoch 60/80 - loss: 0.6230
Epoch 70/80 - loss: 0.6220
Epoch 80/80 - loss: 0.6222
0.6706976744186046 0.6062265731258569


In [17]:
res2 = train_mlp(
    X_train_scaled, y_train, X_test_scaled, y_test,
    activation="tanh",
    optimizer_name="adamw",
    hidden_layers=(16,),
    dropout=0.0,
    lr=3e-4,
    epochs=120,
    batch_size=128,
    weight_decay=1e-4
)
print(res2["test_accuracy"], res2["test_logloss"])


Epoch 10/120 - loss: 0.6351
Epoch 20/120 - loss: 0.6302
Epoch 30/120 - loss: 0.6274
Epoch 40/120 - loss: 0.6256
Epoch 50/120 - loss: 0.6245
Epoch 60/120 - loss: 0.6239
Epoch 70/120 - loss: 0.6235
Epoch 80/120 - loss: 0.6232
Epoch 90/120 - loss: 0.6231
Epoch 100/120 - loss: 0.6230
Epoch 110/120 - loss: 0.6230
Epoch 120/120 - loss: 0.6228
0.6781395348837209 0.6037293762102899


In [18]:
res = train_mlp(X_train_scaled, y_train, X_test_scaled, y_test,
    hidden_layers=(16,),
    activation="relu",
    optimizer_name="adam",
    lr=1e-3,
    dropout=0.0,
    weight_decay=1e-4,
    batch_size=128,
    epochs=200
)

print(res["test_accuracy"], res["test_logloss"])

Epoch 10/200 - loss: 0.6305
Epoch 20/200 - loss: 0.6259
Epoch 30/200 - loss: 0.6241
Epoch 40/200 - loss: 0.6231
Epoch 50/200 - loss: 0.6227
Epoch 60/200 - loss: 0.6222
Epoch 70/200 - loss: 0.6217
Epoch 80/200 - loss: 0.6214
Epoch 90/200 - loss: 0.6209
Epoch 100/200 - loss: 0.6207
Epoch 110/200 - loss: 0.6208
Epoch 120/200 - loss: 0.6202
Epoch 130/200 - loss: 0.6200
Epoch 140/200 - loss: 0.6196
Epoch 150/200 - loss: 0.6196
Epoch 160/200 - loss: 0.6193
Epoch 170/200 - loss: 0.6190
Epoch 180/200 - loss: 0.6189
Epoch 190/200 - loss: 0.6189
Epoch 200/200 - loss: 0.6188
0.6674418604651163 0.6060921786530654


In [19]:
res = train_mlp(X_train_scaled, y_train, X_test_scaled, y_test,
    hidden_layers=(32, 16),
    activation="relu",
    optimizer_name="sgd",
    lr=5e-3,
    dropout=0.0,
    weight_decay=1e-4,
    batch_size=128,
    epochs=300
)
print(res["test_accuracy"], res["test_logloss"])

Epoch 10/300 - loss: 0.6411
Epoch 20/300 - loss: 0.6309
Epoch 30/300 - loss: 0.6273
Epoch 40/300 - loss: 0.6252
Epoch 50/300 - loss: 0.6235
Epoch 60/300 - loss: 0.6227
Epoch 70/300 - loss: 0.6223
Epoch 80/300 - loss: 0.6218
Epoch 90/300 - loss: 0.6211
Epoch 100/300 - loss: 0.6205
Epoch 110/300 - loss: 0.6204
Epoch 120/300 - loss: 0.6200
Epoch 130/300 - loss: 0.6196
Epoch 140/300 - loss: 0.6190
Epoch 150/300 - loss: 0.6192
Epoch 160/300 - loss: 0.6181
Epoch 170/300 - loss: 0.6179
Epoch 180/300 - loss: 0.6175
Epoch 190/300 - loss: 0.6171
Epoch 200/300 - loss: 0.6163
Epoch 210/300 - loss: 0.6160
Epoch 220/300 - loss: 0.6157
Epoch 230/300 - loss: 0.6152
Epoch 240/300 - loss: 0.6150
Epoch 250/300 - loss: 0.6146
Epoch 260/300 - loss: 0.6134
Epoch 270/300 - loss: 0.6131
Epoch 280/300 - loss: 0.6127
Epoch 290/300 - loss: 0.6119
Epoch 300/300 - loss: 0.6124
0.6637209302325582 0.6075158675019169
