<a href="https://colab.research.google.com/github/frankodzia/ai-course-gp/blob/main/Lekcja_20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json, random, math

In [None]:
with open("tagi_gier_wspoldzielone.json", encoding="utf-8") as f:
  data = json.load(f)

In [None]:
label_count = {}

In [None]:
for game in data:
  label = game['label']
  if label not in label_count:
    label_count[label] = 0
  label_count[label] += 1
print(f"Liczba elementów słownika: {len(data)}")
print(f"Liczba unikalnych gier: {len(label_count)}")
print(f"Liczba rekordów dla każdej gry")
for label in label_count:
  print(f" - {label} : {label_count[label]}")

Liczba elementów słownika: 300
Liczba unikalnych gier: 5
Liczba rekordów dla każdej gry
 - minecraft : 60
 - fortnite : 60
 - roblox : 60
 - lol : 60
 - cs : 60


In [None]:
tag_count = {}
tag_to_labels = {}

for game in data:
  label = game['label']
  for tag in game['tags']:
    if tag not in tag_count:
      tag_count[tag] = 0
    tag_count[tag] += 1
    if tag not in tag_to_labels:
      tag_to_labels[tag] = []
    if label not in tag_to_labels[tag]:
      tag_to_labels[tag].append(label)

In [None]:
only_one = 0
multiple_labels = 0
for tag in tag_to_labels:
  label_total = len(tag_to_labels[tag])
  if label_total == 1:
    only_one += 1
  else:
    multiple_labels += 1
print(f"Liczba tagów: {len(tag_count)}")
print(f"Tagi przypisane tylko do jednej grupy: {only_one}")
print(f"Tagi wspólne dla wielu gier: {multiple_labels}")

Liczba tagów: 74
Tagi przypisane tylko do jednej grupy: 25
Tagi wspólne dla wielu gier: 49


In [None]:
def load_data(path):
  with open("tagi_gier_wspoldzielone.json", encoding="utf-8") as f:
    return  json.load(f)

In [None]:
print(len(data))
def train_test_split(data, test_ratio = 0.20):
  random.shuffle(data)
  cut = int(len(data) * (1 - test_ratio))
  return data[:cut], data[cut:]

300


In [None]:
def build_vocabulary(train):
  vocab = set()
  for rec in train:
    vocab.update(rec["tags"])
  return vocab

In [None]:
def train_nb(train, vocab, alpha=1.0):
  class_counts = {}
  word_counts = {}
  total_word = {}
  for rec in train:
    c = rec["label"]
    class_counts[c] = class_counts.get(c, 0) + 1
    word_counts.setdefault(c, {})
    total_word.setdefault(c, 0)
  for tag in rec["tags"]:
    word_counts[c][tag] = word_counts[c].get(tag, 0) + 1
    total_word[c] += 1
    model = {
        "class_counts" : class_counts,
        "word_counts": word_counts,
        "total_words": total_word,
        "vocab": vocab,
        "alpha": alpha,
        "total_docs": len(train)

    }
  print(model)
  return model

In [None]:
def log_prob(model, rec, class_name):
    logp = math.log(model["class_counts"][class_name] / model["total_docs"])
    V    = len(model["vocab"])
    a    = model["alpha"]


    for tag in rec["tags"]:
        wc = model["word_counts"][class_name].get(tag, 0)
        logp += math.log((wc + a) / (model["total_words"][class_name] + a * V))
    return logp

In [None]:
def predict(model, rec):
    best_class, best_log = None, -1e99
    for c in model["class_counts"]:
        lp = log_prob(model, rec, c)
        if lp > best_log:
            best_class, best_log = c, lp
    return best_class

In [None]:
#Ewaluacja na zbiorze testowym
def evaluate(model, test):
    correct = 0
    for rec in test:
        if predict(model, rec) == rec["label"]:
            correct += 1
    accuracy = correct / len(test)
    print(f"Dokładność (accuracy) = {accuracy:.2%}")

In [None]:
def main():
  path = "tagi_gier_wspoldzielone.json"
  data = load_data(path)
  train, test = train_test_split(data, test_ratio = 0.20)
  vocab = build_vocabulary(data)
  model = train_nb(train,vocab, alpha = 1.0)
  sample = test[0]
  print("\nPrzykładowe tagi:", sample["tags"])
  print("Rzeczywista gra:", sample["label"])
  print("Model przewidział:", predict(model, sample))

  evaluate(model, test)
main()

{'class_counts': {'fortnite': 45, 'roblox': 48, 'minecraft': 48, 'lol': 49, 'cs': 50}, 'word_counts': {'fortnite': {}, 'roblox': {}, 'minecraft': {'karabin': 1, 'arena': 1, 'tryb solo': 1, 'pistolet': 1, 'kopanie': 1, 'redstone': 1, 'wyzwania': 1, 'nether': 1}, 'lol': {}, 'cs': {}}, 'total_words': {'fortnite': 0, 'roblox': 0, 'minecraft': 8, 'lol': 0, 'cs': 0}, 'vocab': {'skiny', 'redstone', 'slurp', 'waluta', 'robuxy', 'biomy', 'hardcore', 'bomba', 'questy', 'online', 'crafting', 'baron', 'tryb', 'battle', 'kopanie', 'ulti', 'runy', 'parkour', 'dust2', 'v-bucksy', 'studio', 'nexus', 'emotki', 'kolekcje', 'sandbox', 'awp', 'sklep', 'nether', 'przetrwanie', 'strzał w głowę', 'budowanie', 'duety', 'prywatny serwer', 'multiplayer', 'burza', 'bohaterowie', 'wydarzenia', 'karabin', 'strzelanie', 'drużyna', 'survival', 'rankedy', 'lua', 'autobus bojowy', 'pistolet', 'tickrate', 'snajper', 'tryb kreatywny', 'brokhaven', 'czat', 'wyzwania', 'loot', 'creepery', 'roleplay', 'diamenty', 'sezon', 