In [1]:
import pandas as pd
import numpy as np

# Load files
test = pd.read_csv("/workspaces/bakery_sales_prediction/test.csv", parse_dates=["Datum"])
kiwo = pd.read_csv("/workspaces/bakery_sales_prediction/kiwo.csv", parse_dates=["Datum"])
wetter = pd.read_csv("/workspaces/bakery_sales_prediction/wetter.csv", parse_dates=["Datum"])

# Merge Kieler Woche indicator
test = test.merge(kiwo, on="Datum", how="left")

# Merge weather data
test = test.merge(wetter, on="Datum", how="left")

# Fill missing values conservatively (optional tuning possible)
test["KielerWoche"] = test["KielerWoche"].fillna(0).astype(int)
test["Bewoelkung"] = test["Bewoelkung"].fillna(test["Bewoelkung"].median())
test["Temperatur"] = test["Temperatur"].fillna(test["Temperatur"].median())
test["Windgeschwindigkeit"] = test["Windgeschwindigkeit"].fillna(test["Windgeschwindigkeit"].median())

# Add date-based features
test["DayOfWeek"] = test["Datum"].dt.dayofweek
test["Weekend"] = test["DayOfWeek"].isin([5, 6]).astype(int)
test["Month"] = test["Datum"].dt.month

# Add holiday and season features
def get_season(month):
    if month in [12, 1, 2]:
        return "Winter"
    elif month in [3, 4, 5]:
        return "Frühling"
    elif month in [6, 7, 8]:
        return "Sommer"
    else:
        return "Herbst"

def get_holiday_name(date):
    # Example: replace with your real logic
    if date.strftime("%Y-%m-%d") in ["2024-12-25", "2024-01-01"]:
        return "Weihnachten" if date.month == 12 else "Neujahr"
    return "None"

test["Season"] = test["Month"].apply(get_season)
test["HolidayName"] = test["Datum"].apply(get_holiday_name)
test["IsHoliday_lib"] = (test["HolidayName"] != "None").astype(int)

# Categorize weather
def categorize_temp(t):
    if t < 5:
        return "Kalt"
    elif t <= 20:
        return "Mild"
    else:
        return "Heiß"

def categorize_clouds(b):
    if b < 3:
        return "Klar"
    elif b <= 6:
        return "Teilweise bewölkt"
    else:
        return "Stark bewölkt"

def categorize_wind(w):
    if w < 10:
        return "Schwach"
    elif w <= 25:
        return "Mittel"
    else:
        return "Stark"

test["TemperatureCategory"] = test["Temperatur"].apply(categorize_temp)
test["CloudCategory"] = test["Bewoelkung"].apply(categorize_clouds)
test["WindCategory"] = test["Windgeschwindigkeit"].apply(categorize_wind)

print(" Test data prepared. Shape:", test.shape)



 Test data prepared. Shape: (1830, 17)


In [None]:
# Export prepared test data
test.to_csv("/workspaces/bakery_sales_prediction/merged_test_dataset_kaggle.csv", index=False)
print(" Test data exported to merged_test_dataset_kaggle.csv")

 Test data exported to test_dataset_kaggle.csv
