In [None]:
preprocessing zmiennych:

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler


def preprocess_clean_data(dane: pd.DataFrame, y_col: str):
    dane = dane.rename(columns={"#Layovers": "Num_Layovers", "Price [PLN]": "Price"})

    # Przekształcenie dat
    if "Flight_date" in dane.columns:
        dane["Flight_date"] = pd.to_datetime(dane["Flight_date"], errors="coerce")
    if "Extraction_Time" in dane.columns:
        dane["Extraction_Time"] = pd.to_datetime(
            dane["Extraction_Time"].str.split(" ").apply(lambda x: x[0]),
            dayfirst=True, errors="coerce"
        )
    dane["Ticket_class"] = dane["Ticket_class"].map({"Ekonomiczna": 0, "Biznes": 1})
    # Usunięcie zbędnych kolumn
    kolumny_do_usuniecia = [
        "Extraction_Time", "Flight_date", "arr_city", "dep_city",
        "Departure_airport_name", "Destination_airport_name",
        "layover_airport", "ujemne", "low_cost1", "low_cost2",
        "Departure_airport_code", "Destination_airport_code",
        "Flight_weekday", "Extraction_Weekday",
        "Airline1","Airline2","Is_-2"
    ]
    dane.drop(columns=kolumny_do_usuniecia, inplace=True, errors='ignore')

    # Oddzielenie celu od cech
    y = dane[y_col].copy()
    X = dane.drop(columns=[y_col])

    # Konwersja kolumn tekstowych do liczb, jeśli trzeba
    for col in X.columns:
        if X[col].dtype == 'object':
            X[col] = pd.to_numeric(X[col], errors='coerce')

    # Uzupełnianie braków
    X.fillna(0, inplace=True)
    y.fillna(y.mean(), inplace=True)

    return X, y


## Analiza klasyfikacji w zależnosci od usunietej kolumny


In [None]:
import pandas as pd
import numpy as np
from perceptron_nn_classifier import MultiLayerPerceptronClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from glob import glob

# === Parametry eksperymentu ===
num_repetitions = 5
target_col = "Price"  # zakładamy że to teraz kategoria/klasa

# === Wczytanie i wstępne przetworzenie danych ===
df = pd.read_excel("loty_clean.xlsx")
X_full, y_full = preprocess_clean_data(df.copy(), y_col=target_col)

# Jeśli y_full nie jest zakodowane do one-hot
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_full_encoded = lb.fit_transform(y_full)
num_classes = len(lb.classes_)

# Lista kolumn do testowania (po preprocessingu)
testable_columns = X_full.columns.tolist()

# Parametry sieci (bazowe)
baseline_params = {
    "num_layers": [15],
    "learning_rate": 0.1,
    "activation_function": "relu",
    "num_epochs": 2000
}

# Lista wyników
results = []

# === Pętla po kolumnach ===
for col in testable_columns:
    print(f"\n🧪 Testuję bez kolumny: {col}")

    X_temp = X_full.drop(columns=[col])

    try:
        # Podział na train/val/test
        X_train_np, X_temp_np, y_train_np, y_temp_np = train_test_split(X_temp, y_full_encoded, test_size=0.3, random_state=42)
        X_val_np, X_test_np, y_val_np, y_test_np = train_test_split(X_temp_np, y_temp_np, test_size=0.5, random_state=42)
    except Exception as e:
        print(f"❌ Błąd przy podziale danych (kolumna {col}): {e}")
        continue

    # Miejsce na metryki z powtórzeń
    acc_list, prec_list, rec_list, f1_list = [], [], [], []

    for i in range(num_repetitions):
        print(f"   🔁 Powtórzenie {i+1}/{num_repetitions}")
        try:
            mlp = MultiLayerPerceptronClassifier(
                num_inputs=X_train_np.shape[1],
                num_classes=num_classes,
                num_layers=baseline_params["num_layers"],
                learning_rate=baseline_params["learning_rate"],
                activation_function=baseline_params["activation_function"]
            )

            mlp.fit(
                X=X_train_np,
                y=y_train_np,
                X_val=X_val_np,
                y_val=y_val_np,
                num_epochs=baseline_params["num_epochs"],
                verbose=False
            )

            y_pred = mlp.predict(X_test_np)
            y_true = np.argmax(y_test_np, axis=1)

            acc_list.append(accuracy_score(y_true, y_pred))
            prec_list.append(precision_score(y_true, y_pred, average="weighted", zero_division=0))
            rec_list.append(recall_score(y_true, y_pred, average="weighted", zero_division=0))
            f1_list.append(f1_score(y_true, y_pred, average="weighted", zero_division=0))

        except Exception as e:
            print(f"❌ Błąd treningu dla kolumny {col}: {e}")
            continue

    # Zapisz średnie wyniki dla danej kolumny
    results.append({
        "usunięta_kolumna": col,
        "test_accuracy": np.mean(acc_list),
        "test_precision": np.mean(prec_list),
        "test_recall": np.mean(rec_list),
        "test_f1": np.mean(f1_list)
    })

# === Zapis wyników ===
results_df = pd.DataFrame(results)
filename = f"dobor_zmiennych_klasyfikacja_nn_{len(glob('dobor_zmiennych_klasyfikacja_nn_*.xlsx')) + 1}.xlsx"
results_df.to_excel(filename, index=False)
print(f"\n✅ Wyniki zapisane do pliku: {filename}")
