<a href="https://colab.research.google.com/github/balazsakos03/allamvizsga_tudorontgen_BalazsAkos/blob/main/notebooks/allamvizsga_kezdeti_modell.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf, sys, os, subprocess, json, pathlib
import numpy as np
print("TF:", tf.__version__)
print("GPU:", tf.config.list_physical_devices('GPU'))

TF: 2.19.0
GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


Git beállítás + repó klónozása

In [2]:
!git config --global user.name "balazsakos03"
!git config --global user.email "balazs.akos@student.ms.sapientia.ro"

!rm -rf /content/repo
REPO_URL = "https://github.com/balazsakos03/allamvizsga_tudorontgen_BalazsAkos.git"
!git clone "$REPO_URL" /content/repo

!ls -la /content/repo

Cloning into '/content/repo'...
remote: Enumerating objects: 13, done.[K
remote: Counting objects: 100% (13/13), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 13 (delta 3), reused 8 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (13/13), done.
Resolving deltas: 100% (3/3), done.
total 36
drwxr-xr-x 7 root root 4096 Aug 18 17:52 .
drwxr-xr-x 1 root root 4096 Aug 18 17:52 ..
drwxr-xr-x 2 root root 4096 Aug 18 17:52 data
drwxr-xr-x 8 root root 4096 Aug 18 17:52 .git
-rw-r--r-- 1 root root  329 Aug 18 17:52 .gitignore
drwxr-xr-x 2 root root 4096 Aug 18 17:52 notebooks
-rw-r--r-- 1 root root   36 Aug 18 17:52 README.md
drwxr-xr-x 2 root root 4096 Aug 18 17:52 results
drwxr-xr-x 2 root root 4096 Aug 18 17:52 src


Kaggle API fájl beállítása

In [5]:
import os, shutil, zipfile

!pip -q install kaggle

assert os.path.exists('/content/kaggle.json'), "Töltsd fel a kaggle.json-t a /content gyökérbe!"

os.makedirs('/root/.kaggle', exist_ok=True)
shutil.copy('/content/kaggle.json', '/root/.kaggle/kaggle.json')
os.chmod('/root/.kaggle/kaggle.json', 0o600)

print("Kaggle API kész.")


Kaggle API kész.


Dataset letöltése és kicsomagolása

In [6]:
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia -p /content/data

zip_path = '/content/data/chest-xray-pneumonia.zip'
!unzip -q -o "$zip_path" -d /content/data
!rm "$zip_path"

!find /content/data -maxdepth 2 -type d


Dataset URL: https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia
License(s): other
Downloading chest-xray-pneumonia.zip to /content/data
 99% 2.27G/2.29G [00:25<00:00, 232MB/s]
100% 2.29G/2.29G [00:25<00:00, 95.8MB/s]
/content/data
/content/data/chest_xray
/content/data/chest_xray/val
/content/data/chest_xray/chest_xray
/content/data/chest_xray/__MACOSX
/content/data/chest_xray/test
/content/data/chest_xray/train


Adatbetöltés + modell + tréning

In [7]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os, json
import numpy as np

DATA_DIR = "/content/data/chest_xray/chest_xray"
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
SEED = 42

train_ds = tf.keras.utils.image_dataset_from_directory(
    f"{DATA_DIR}/train", image_size=IMG_SIZE, batch_size=BATCH_SIZE, seed=SEED
)
val_ds = tf.keras.utils.image_dataset_from_directory(
    f"{DATA_DIR}/val", image_size=IMG_SIZE, batch_size=BATCH_SIZE, seed=SEED
)
test_ds = tf.keras.utils.image_dataset_from_directory(
    f"{DATA_DIR}/test", image_size=IMG_SIZE, batch_size=BATCH_SIZE, shuffle=False
)

class_names = train_ds.class_names
num_classes = len(class_names)
print("Osztályok:", class_names)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(AUTOTUNE)
val_ds   = val_ds.cache().prefetch(AUTOTUNE)
test_ds  = test_ds.cache().prefetch(AUTOTUNE)

data_aug = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.05),
], name="aug")

def make_cnn(input_shape=(224,224,3), num_classes=2, dropout_rate=0.25):
    inp = keras.Input(shape=input_shape)
    x = data_aug(inp)
    x = layers.Rescaling(1./255)(x)
    for f in [32, 64, 128, 256]:
        x = layers.Conv2D(f, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)
        x = layers.Conv2D(f, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)
        x = layers.MaxPooling2D()(x)
        x = layers.Dropout(dropout_rate)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation="relu")(x); x = layers.Dropout(0.5)(x)
    x = layers.Dense(64, activation="relu")(x);  x = layers.Dropout(0.25)(x)
    out = layers.Dense(num_classes, activation="softmax")(x)
    return keras.Model(inp, out, name="xray_cnn")

model = make_cnn(IMG_SIZE+(3,), num_classes)
model.compile(optimizer=keras.optimizers.Adam(1e-3),
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
model.summary()

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
os.makedirs("/content/results/checkpoints", exist_ok=True)
cbs = [
    EarlyStopping(monitor="val_loss", patience=8, restore_best_weights=True),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=4, min_lr=1e-6),
    ModelCheckpoint("/content/results/checkpoints/best.keras", monitor="val_accuracy",
                    save_best_only=True)
]

history = model.fit(train_ds, validation_data=val_ds, epochs=50, callbacks=cbs)


Found 5216 files belonging to 2 classes.
Found 16 files belonging to 2 classes.
Found 624 files belonging to 2 classes.
Osztályok: ['NORMAL', 'PNEUMONIA']


Epoch 1/50
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 334ms/step - accuracy: 0.7399 - loss: 5.9136 - val_accuracy: 0.5000 - val_loss: 1.0655 - learning_rate: 0.0010
Epoch 2/50
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 334ms/step - accuracy: 0.8630 - loss: 0.5298 - val_accuracy: 0.5625 - val_loss: 0.7276 - learning_rate: 0.0010
Epoch 3/50
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 326ms/step - accuracy: 0.8886 - loss: 0.2919 - val_accuracy: 0.5000 - val_loss: 4.8481 - learning_rate: 0.0010
Epoch 4/50
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 336ms/step - accuracy: 0.8810 - loss: 0.2960 - val_accuracy: 0.6875 - val_loss: 1.3399 - learning_rate: 0.0010
Epoch 5/50
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 327ms/step - accuracy: 0.8866 - loss: 0.2524 - val_accuracy: 0.5000 - val_loss: 0.9047 - learning_rate: 0.0010
Epoch 6/50
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0

Kiértékelés + ábrák mentése

In [9]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

#loss/accuracy gorbek
os.makedirs("/content/results/figures", exist_ok=True)
plt.figure(); plt.plot(history.history["loss"]); plt.plot(history.history["val_loss"])
plt.title("Loss"); plt.legend(["train","val"]); plt.savefig("/content/results/figures/loss.png"); plt.close()
plt.figure(); plt.plot(history.history["accuracy"]); plt.plot(history.history["val_accuracy"])
plt.title("Accuracy"); plt.legend(["train","val"]); plt.savefig("/content/results/figures/accuracy.png"); plt.close()

#teszt metrikak
y_true = np.concatenate([y.numpy() for _, y in test_ds], axis=0)
y_pred = np.argmax(model.predict(test_ds, verbose=0), axis=1)
cm = confusion_matrix(y_true, y_pred)
print("Confusion matrix:\n", cm)
print("\nReport:\n", classification_report(y_true, y_pred, target_names=class_names, digits=4))

#modell es metrikak mentese
os.makedirs("/content/results/metrics", exist_ok=True)
model.save("/content/results/xray_cnn_best.h5")
with open("/content/results/metrics/summary.json", "w") as f:
    json.dump({
        "classes": class_names,
        "final_val_acc": float(history.history["val_accuracy"][-1]),
        "final_val_loss": float(history.history["val_loss"][-1]),
    }, f, indent=2)




Confusion matrix:
 [[169  65]
 [ 22 368]]

Report:
               precision    recall  f1-score   support

      NORMAL     0.8848    0.7222    0.7953       234
   PNEUMONIA     0.8499    0.9436    0.8943       390

    accuracy                         0.8606       624
   macro avg     0.8674    0.8329    0.8448       624
weighted avg     0.8630    0.8606    0.8572       624



Eredmények bemásolása a repóba és push

In [11]:
# Copy results -> repo
!mkdir -p /content/repo/results
!cp -r /content/results/* /content/repo/results/

# (opcionális) Notebook mentés GitHubba: Colab menüben:
# File -> Save a copy in GitHub (ez a legkényelmesebb)

# Commit + push
%cd /content/repo
!git add .
!git commit -m "Add training results, figures, and saved model"
print("A következő lépés a push; felhasználónév + TOKEN kell majd.")
!git push origin main


/content/repo
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
A következő lépés a push; felhasználónév + TOKEN kell majd.
fatal: could not read Username for 'https://github.com': No such device or address
