## Packages

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import learning_curve
import matplotlib.pyplot as plt
from xgboost import XGBClassifier


## Load Data

In [2]:
data_path = '../IDMT-Traffic/datasets/df_main_encoded_only.csv'  
df = pd.read_csv(data_path)
df = df.drop(columns=['file', 'Unnamed: 0'])
print(df.head(2))

   is_background_encoded  date_time_encoded  location_encoded  \
0                      0                  0                 0   
1                      0                  0                 0   

   speed_kmh_encoded  daytime_encoded  weather_encoded  vehicle_encoded  \
0                  0                1                0                1   
1                  0                1                0                1   

   source_direction_encoded  microphone_encoded  channel_encoded  ...  \
0                         1                   0                0  ...   
1                         1                   1                1  ...   

   band_27_dB  band_28_dB  band_29_dB  peak_dB_1  peak_freq_1  peak_dB_2  \
0   37.024301   38.508511   35.946349  50.180933    31.622777  49.528332   
1   34.516289   34.960402   33.187933  54.903541  1000.000000  53.196406   

   peak_freq_2  peak_dB_3  peak_freq_3  octband_dB_mean  
0  1000.000000  47.901831   794.328235        42.383307  
1   794.32823

## Data Initialization & Split

In [4]:
# set target
target = 'daytime_encoded'  # Zielvariable
X = df.drop(columns=[target])  # Features (alle Spalten außer 'daytime')
y = df[target]  # Zielvariable

# standardize data
scaler = StandardScaler() 
X_filtered_scaled = scaler.fit_transform(X)


print("Shape von y:", y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
print(f"Trainingsdaten: {X_train.shape}, Testdaten: {X_test.shape}")

Shape von y: (9361,)
Trainingsdaten: (6552, 58), Testdaten: (2809, 58)


In [4]:
target = 'daytime_encoded'  # Zielvariable
X = df.drop(columns=[target])  # Features (alle Spalten außer 'daytime')
y = df[target]  # Zielvariable

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# 1st split: train/ test 
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# 2nd split: train / validation 
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.25, random_state=42, stratify=y_train_full)
# TRAIN 60% VAL 20% TEST 20%

# Ausgabe der Größen
print(f"Trainingsdaten: {X_train.shape}, Validierungsdaten: {X_val.shape}, Testdaten: {X_test.shape}")


Trainingsdaten: (5616, 58), Validierungsdaten: (1872, 58), Testdaten: (1873, 58)


## Initialize Model

In [5]:
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

## Train Model

In [None]:
# Trainieren des Modells
xgb_model.fit(X_train, y_train)

### Results

In [7]:
y_pred = xgb_model.predict(X_test)

target_names = ['A', 'M']

print("Accuracy:", accuracy_score(y_test, y_pred))
print('-'*80)

print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names= target_names))
print('-'*80)

print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print('-'*80)

Accuracy: 1.0
--------------------------------------------------------------------------------

Classification Report:
               precision    recall  f1-score   support

           A       1.00      1.00      1.00      1642
           M       1.00      1.00      1.00      1167

    accuracy                           1.00      2809
   macro avg       1.00      1.00      1.00      2809
weighted avg       1.00      1.00      1.00      2809

--------------------------------------------------------------------------------

Confusion Matrix:
 [[1642    0]
 [   0 1167]]
--------------------------------------------------------------------------------


## Validate


In [7]:
y_val_pred = xgb_model.predict(X_val)

target_names = ['A', 'M']

print("Validation Accuracy:", accuracy_score(y_val, y_val_pred))
print('-'*80)

print("\nClassification Report:\n", classification_report(y_val, y_val_pred, target_names= target_names))
print('-'*80)

print("\nConfusion Matrix:\n", confusion_matrix(y_val, y_val_pred))
print('-'*80)

Validation Accuracy: 1.0
--------------------------------------------------------------------------------

Classification Report:
               precision    recall  f1-score   support

           A       1.00      1.00      1.00      1095
           M       1.00      1.00      1.00       777

    accuracy                           1.00      1872
   macro avg       1.00      1.00      1.00      1872
weighted avg       1.00      1.00      1.00      1872

--------------------------------------------------------------------------------

Confusion Matrix:
 [[1095    0]
 [   0  777]]
--------------------------------------------------------------------------------


## Test

In [8]:
y_test_pred = xgb_model.predict(X_test)

target_names = ['A', 'M']

print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
print('-'*80)

print("\nClassification Report:\n", classification_report(y_test, y_test_pred, target_names= target_names))
print('-'*80)

print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_test_pred))
print('-'*80)

Test Accuracy: 1.0
--------------------------------------------------------------------------------

Classification Report:
               precision    recall  f1-score   support

           A       1.00      1.00      1.00      1095
           M       1.00      1.00      1.00       778

    accuracy                           1.00      1873
   macro avg       1.00      1.00      1.00      1873
weighted avg       1.00      1.00      1.00      1873

--------------------------------------------------------------------------------

Confusion Matrix:
 [[1095    0]
 [   0  778]]
--------------------------------------------------------------------------------
