In [14]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.exceptions import ConvergenceWarning
import warnings
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.metrics import MeanAbsoluteError

warnings.filterwarnings("ignore", category=ConvergenceWarning)

data = pd.read_csv('./bg_info.csv')
data = data.drop(columns=['Type 2', 'Price'])
data = data.loc[data.drop(columns=['Type 1']).notnull().all(axis=1)]
data['ToRemove'] = False

def mark_outliers_quantile(df, column):
    Q1, Q3 = df[column].quantile([0.25, 0.75])
    IQR = Q3 - Q1
    lower, upper = Q1 - 1.5 * IQR, Q3 + 1.5 * IQR
    df.loc[(df[column] < lower) | (df[column] > upper), 'ToRemove'] = True

def mark_outliers_sigma(df, column):
    mean, std = df[column].mean(), df[column].std()
    lower, upper = mean - 3 * std, mean + 3 * std
    df.loc[(df[column] < lower) | (df[column] > upper), 'ToRemove'] = True

quantile_columns = ['Min time', 'Max time']
sigma_columns = ['Geek Rating', 'Complexity', 'Avg rating', 'Num of voters']

for col in quantile_columns:
    mark_outliers_quantile(data, col)
for col in sigma_columns:
    mark_outliers_sigma(data, col)

data = data[data['ToRemove'] == False].drop(columns=['ToRemove'])
data = data[(data['Year'] >= 1950) & (data['Year'] < 2025) & (data['Complexity'] != 0.0)]

bins = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, float('inf')]
labels = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11-15', '16-20', '>20']
data['Max_players_category'] = pd.cut(data['Max players'], bins=bins, labels=labels, include_lowest=True)
data['Age_category'] = pd.cut(data['Age'], bins=bins, labels=labels, include_lowest=True)
data['Min_players_category'] = data['Min players'].astype('category')
data = data.drop(columns=['Age', 'Max players', 'Min players', 'Title'])
data = pd.get_dummies(data, columns=['Type 1'], drop_first=True)

data = data.apply(lambda col: col.cat.codes + 1 if col.dtype.name == 'category' else col)
X, y = data.drop(columns=['Num of voters']), data['Num of voters']
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=2137)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=2137)

scaler = StandardScaler()
X_train_scaled, X_val_scaled, X_test_scaled = scaler.fit_transform(X_train), scaler.transform(X_val), scaler.transform(X_test)

def calculate_percentage_within_margin(y_true, y_pred, margin=0.1):
    lower, upper = y_true * (1 - margin), y_true * (1 + margin)
    return str(round((sum((y_pred >= lower) & (y_pred <= upper)) / len(y_true)) * 100, 2)) + "%"


In [15]:

model = keras.Sequential([
    keras.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=[MeanAbsoluteError()])

checkpoint_path = "model_checkpoints/2_layers.weights.h5"

model.load_weights(checkpoint_path)
print(f"Wagi załadowane z {checkpoint_path}")

loss, mae = model.evaluate(X_test_scaled, y_test, verbose=1)

y_pred = model.predict(X_test_scaled).flatten()

rmse = np.sqrt(loss)

margin_accuracy = calculate_percentage_within_margin(y_test, y_pred)

print(f'Loss: {loss}, MAE: {mae}, RMSE: {rmse}, Accuracy within margin: {margin_accuracy}')

  saveable.load_own_variables(weights_store.get(inner_path))


Wagi załadowane z model_checkpoints/2_layers.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 870us/step - loss: 65539.3750 - mean_absolute_error: 66.1840
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 992us/step
Loss: 43361.09375, MAE: 65.69286346435547, RMSE: 208.23326763512117, Accuracy within margin: 37.24%


In [16]:
model = keras.Sequential([
    keras.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=[MeanAbsoluteError()])

checkpoint_path = "model_checkpoints/4_layers.weights.h5"

model.load_weights(checkpoint_path)
print(f"Wagi załadowane z {checkpoint_path}")

loss, mae = model.evaluate(X_test_scaled, y_test, verbose=1)

y_pred = model.predict(X_test_scaled).flatten()

rmse = np.sqrt(loss)

margin_accuracy = calculate_percentage_within_margin(y_test, y_pred)

print(f'Loss: {loss}, MAE: {mae}, RMSE: {rmse}, Accuracy within margin: {margin_accuracy}')

Wagi załadowane z model_checkpoints/4_layers.weights.h5
[1m  1/125[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14s[0m 118ms/step - loss: 3608.7910 - mean_absolute_error: 41.0372

  saveable.load_own_variables(weights_store.get(inner_path))


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 871us/step - loss: 58188.5742 - mean_absolute_error: 64.3757
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
Loss: 41140.02734375, MAE: 66.05250549316406, RMSE: 202.83004546602558, Accuracy within margin: 40.21%


In [17]:

model = Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=[MeanAbsoluteError()])

checkpoint_path = "model_checkpoints/5_layers.weights.h5"

model.load_weights(checkpoint_path)
print(f"Wagi załadowane z {checkpoint_path}")

loss, mae = model.evaluate(X_test_scaled, y_test, verbose=1)

y_pred = model.predict(X_test_scaled).flatten()

rmse = np.sqrt(loss)

margin_accuracy = calculate_percentage_within_margin(y_test, y_pred)

print(f'Loss: {loss}, MAE: {mae}, RMSE: {rmse}, Accuracy within margin: {margin_accuracy}')


Wagi załadowane z model_checkpoints/5_layers.weights.h5
[1m  1/125[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m17s[0m 141ms/step - loss: 3528.3572 - mean_absolute_error: 40.0068

  saveable.load_own_variables(weights_store.get(inner_path))


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 927us/step - loss: 60478.0312 - mean_absolute_error: 58.8270
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Loss: 40519.5546875, MAE: 58.18440628051758, RMSE: 201.2946961236187, Accuracy within margin: 41.29%


In [18]:

model = Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=[MeanAbsoluteError()])

checkpoint_path = "model_checkpoints/6_layers.weights.h5"

model.load_weights(checkpoint_path)
print(f"Wagi załadowane z {checkpoint_path}")

loss, mae = model.evaluate(X_test_scaled, y_test, verbose=1)

y_pred = model.predict(X_test_scaled).flatten()

rmse = np.sqrt(loss)

margin_accuracy = calculate_percentage_within_margin(y_test, y_pred)

print(f'Loss: {loss}, MAE: {mae}, RMSE: {rmse}, Accuracy within margin: {margin_accuracy}')


Wagi załadowane z model_checkpoints/6_layers.weights.h5
[1m  1/125[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19s[0m 155ms/step - loss: 5082.1230 - mean_absolute_error: 46.7018

  saveable.load_own_variables(weights_store.get(inner_path))


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 48353.4453 - mean_absolute_error: 58.1289
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Loss: 34706.11328125, MAE: 58.034706115722656, RMSE: 186.29576828594364, Accuracy within margin: 42.65%


In [19]:


model = keras.Sequential([
    keras.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(1028, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=[MeanAbsoluteError()])

checkpoint_path = "model_checkpoints/8_layers.weights.h5"

model.load_weights(checkpoint_path)
print(f"Wagi załadowane z {checkpoint_path}")

loss, mae = model.evaluate(X_test_scaled, y_test, verbose=1)

y_pred = model.predict(X_test_scaled).flatten()

rmse = np.sqrt(loss)

margin_accuracy = calculate_percentage_within_margin(y_test, y_pred)

print(f'Loss: {loss}, MAE: {mae}, RMSE: {rmse}, Accuracy within margin: {margin_accuracy}')


Wagi załadowane z model_checkpoints/8_layers.weights.h5


  saveable.load_own_variables(weights_store.get(inner_path))


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 67560.3047 - mean_absolute_error: 66.5326
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Loss: 44285.11328125, MAE: 66.08032989501953, RMSE: 210.44028435936406, Accuracy within margin: 33.72%


In [22]:

model = Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=[MeanAbsoluteError()])

checkpoint_path = "model_checkpoints/6_layers_learning_rate_schedule.weights.h5"

model.load_weights(checkpoint_path)
print(f"Wagi załadowane z {checkpoint_path}")

loss, mae = model.evaluate(X_test_scaled, y_test, verbose=1)

y_pred = model.predict(X_test_scaled).flatten()

rmse = np.sqrt(loss)

margin_accuracy = calculate_percentage_within_margin(y_test, y_pred)

print(f'Loss: {loss}, MAE: {mae}, RMSE: {rmse}, Accuracy within margin: {margin_accuracy}')


Wagi załadowane z model_checkpoints/6_layers_learning_rate_schedule.weights.h5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 61499.9180 - mean_absolute_error: 64.0226
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Loss: 45853.875, MAE: 64.36234283447266, RMSE: 214.13517926767662, Accuracy within margin: 37.02%
