In [1]:
# import library
import pandas as pd # for data manipulation

%pip install pyodide-http
import pyodide_http # for loading external files from http 
pyodide_http.patch_all() 

import matplotlib.pyplot as plt # for plottings 

# import data 
weather = pd.read_csv('https://raw.githubusercontent.com/opencampus-sh/einfuehrung-in-data-science-und-ml/main/wetter.csv')
kiwo = pd.read_csv('https://raw.githubusercontent.com/opencampus-sh/einfuehrung-in-data-science-und-ml/main/kiwo.csv')
sale = pd.read_csv('https://raw.githubusercontent.com/opencampus-sh/einfuehrung-in-data-science-und-ml/main/umsatzdaten_gekuerzt.csv')

# merge data 
sale_weather_kiwo = sale.merge(weather, on='Datum', how='outer', suffixes = ("_sale", "_weather")) \
.merge(kiwo, on='Datum', how='outer', suffixes = ("", "_kiwo"))

# sale_weather_kiwo.head()

# === DATE-BASED FEATURES - categorizing "datum" values ===
sale_weather_kiwo['Datum'] = pd.to_datetime(sale_weather_kiwo['Datum'], errors='coerce')

sale_weather_kiwo['DayOfWeek'] = sale_weather_kiwo['Datum'].dt.dayofweek
sale_weather_kiwo['Weekend'] = sale_weather_kiwo['DayOfWeek'].isin([5, 6]).astype(int)
sale_weather_kiwo['Month'] = sale_weather_kiwo['Datum'].dt.month
sale_weather_kiwo['Year'] = sale_weather_kiwo['Datum'].dt.year
sale_weather_kiwo['Week'] = sale_weather_kiwo['Datum'].dt.isocalendar().week
sale_weather_kiwo['Day'] = sale_weather_kiwo['Datum'].dt.day

sale_weather_kiwo['KielerWoche'] = sale_weather_kiwo['KielerWoche'].fillna(0).astype(int)

# === WEATHER-BASED FEATURES ===

# Categorize temperature
temp_bins = [-float("inf"), 0, 10, 20, 30, float("inf")]
temp_labels = ["very cold", "cold", "mild", "warm", "hot"]
sale_weather_kiwo['TemperatureCategory'] = pd.cut(sale_weather_kiwo['Temperatur'], bins=temp_bins, labels=temp_labels)

# Categorize cloud cover
cloud_bins = [-1, 2, 5, 8, float("inf")]
cloud_labels = ["clear", "partly cloudy", "cloudy", "overcast"]
sale_weather_kiwo['CloudCategory'] = pd.cut(sale_weather_kiwo['Bewoelkung'], bins=cloud_bins, labels=cloud_labels)

# Categorize wind speed
wind_bins = [-1, 10, 20, 30, float("inf")]
wind_labels = ["light", "moderate", "strong", "very strong"]
sale_weather_kiwo['WindCategory'] = pd.cut(sale_weather_kiwo['Windgeschwindigkeit'], bins=wind_bins, labels=wind_labels)

sale_weather_kiwo.head()



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


Unnamed: 0,id,Datum,Warengruppe,Umsatz,Bewoelkung,Temperatur,Windgeschwindigkeit,Wettercode,KielerWoche,DayOfWeek,Weekend,Month,Year,Week,Day,TemperatureCategory,CloudCategory,WindCategory
0,,2012-01-01,,,8.0,9.825,14.0,58.0,0,6,1,1,2012,52,1,cold,cloudy,moderate
1,,2012-01-02,,,7.0,7.4375,12.0,,0,0,0,1,2012,1,2,cold,cloudy,moderate
2,,2012-01-03,,,8.0,5.5375,18.0,63.0,0,1,0,1,2012,1,3,cold,cloudy,moderate
3,,2012-01-04,,,4.0,5.6875,19.0,80.0,0,2,0,1,2012,1,4,cold,partly cloudy,moderate
4,,2012-01-05,,,6.0,5.3,23.0,80.0,0,3,0,1,2012,1,5,cold,cloudy,strong
