# 02 - Combine Data Sources

In [6]:
import pandas as pd

In [7]:
# Load all Saison (ticket sales) files
saison_22_23 = pd.read_csv('Data/data_v1/22-23/Saison 22_23.csv')
saison_23_24 = pd.read_csv('Data/data_v1/23-24/Saison 23_24.csv')
saison_24_25 = pd.read_csv('Data/data_v1/24-25/saison 24_25.csv')

# Combine all saison data
combined_saison = pd.concat([saison_22_23, saison_23_24, saison_24_25], ignore_index=True)

# Rename columns from German to English
column_mapping = {
    'Tag': 'date',
    'Anzahl Tickets': 'ticket_count',
    'Gesamteinnahmen brutto': 'gross_revenue',
    'Season': 'season'
}
combined_saison = combined_saison.rename(columns=column_mapping)

print(f"Combined saison data shape: {combined_saison.shape}")
combined_saison.head()

Combined saison data shape: (96, 4)


Unnamed: 0,date,ticket_count,gross_revenue,season
0,31.05.2022,0,,22-23
1,02.09.2022,1417,19466.0,22-23
2,04.09.2022,1059,14053.0,22-23
3,16.09.2022,1509,24121.0,22-23
4,25.09.2022,1473,15696.0,22-23


In [8]:
# Convert date column to datetime (from DD.MM.YYYY format)
combined_saison['date'] = pd.to_datetime(combined_saison['date'], format='%d.%m.%Y')

# Save combined saison data
combined_saison.to_csv('Data/data_v1/combined_saison_data.csv', index=False)

print(f"Date range: {combined_saison['date'].min()} to {combined_saison['date'].max()}")
combined_saison.info()

Date range: 2022-05-31 00:00:00 to 2025-03-04 00:00:00
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96 entries, 0 to 95
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           96 non-null     datetime64[ns]
 1   ticket_count   96 non-null     int64         
 2   gross_revenue  92 non-null     float64       
 3   season         96 non-null     object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 3.1+ KB


In [11]:
# Load spielplan data
spielplan = pd.read_csv('Data/data_v1/spielplan_Grizzlys_only_final_cleaned_v3.csv')

# Convert to datetime (strip whitespace first)
spielplan['date'] = pd.to_datetime(spielplan['date'].str.strip())
combined_saison['date'] = pd.to_datetime(combined_saison['date'])

# Create temporary date-only columns for merging
spielplan['merge_date'] = spielplan['date'].dt.date
combined_saison['merge_date'] = combined_saison['date'].dt.date

# Merge on the date-only column
merged_data = pd.merge(spielplan, combined_saison.drop(columns=['date']), on='merge_date', how='left')

# Drop the temporary merge column
merged_data = merged_data.drop(columns=['merge_date'])

# Check for season column conflict and clean up if needed
if 'season_x' in merged_data.columns:
    merged_data = merged_data.drop(columns=['season_y'])
    merged_data = merged_data.rename(columns={'season_x': 'season'})

# Save merged data
merged_data.to_csv('Data/data_v1/grizzlys_combined_schedule_sales.csv', index=False)

print(f"Merged data shape: {merged_data.shape}")
print(f"Matches found: {merged_data['ticket_count'].notna().sum()} / {len(merged_data)}")
merged_data.head(10)

Merged data shape: (78, 11)
Matches found: 77 / 78


Unnamed: 0,spieltag,date,weekday,time,home_team,away_team,distance,season,datetime,ticket_count,gross_revenue
0,1,2022-09-16,Friday,19.3,Grizzlys Wolfsburg,Löwen Frankfurt,369.0,22-23,2022-09-16 19:30:00,1509.0,24121.0
1,4,2022-09-25,Sunday,16.3,Grizzlys Wolfsburg,Nürnberg Ice Tigers,463.0,22-23,2022-09-25 16:30:00,1473.0,15696.0
2,5,2022-09-27,Tuesday,19.3,Grizzlys Wolfsburg,Augsburger Panther,588.0,22-23,2022-09-27 19:30:00,987.0,7880.0
3,9,2022-10-09,Sunday,14.0,Grizzlys Wolfsburg,EHC Red Bull München,600.0,22-23,2022-10-09 14:00:00,1336.0,20676.0
4,11,2022-10-16,Sunday,19.0,Grizzlys Wolfsburg,Schwenninger Wild Wings,638.0,22-23,2022-10-16 19:00:00,1146.0,11196.0
5,12,2022-10-18,Wednesday,19.3,Grizzlys Wolfsburg,Nürnberg Ice Tigers,463.0,22-23,2022-10-18 19:30:00,252.0,3813.0
6,14,2022-10-23,Sunday,14.0,Grizzlys Wolfsburg,Pinguins Bremerhaven,252.0,22-23,2022-10-23 14:00:00,1253.0,22158.0
7,17,2022-10-30,Sunday,16.3,Grizzlys Wolfsburg,SC Bietigheim Steelers,,22-23,2022-10-30 16:30:00,1707.0,22918.0
8,19,2022-11-04,Friday,19.3,Grizzlys Wolfsburg,Löwen Frankfurt,369.0,22-23,2022-11-04 19:30:00,1124.0,21524.5
9,21,2022-11-18,Friday,19.3,Grizzlys Wolfsburg,Eisbären Berlin,228.0,22-23,2022-11-18 19:30:00,1027.0,20012.0


In [12]:
# Load the combined schedule sales data
combined_data = pd.read_csv('Data/data_v1/grizzlys_combined_schedule_sales.csv')

# Load season tickets data
season_tickets = pd.read_csv('Data/data_v1/season_tickets.csv')

# Rename columns to English
season_tickets = season_tickets.rename(columns={
    'Anzahl Tickets': 'season_tickets_sold',
    'Gesamteinnahmen brutto': 'season_tickets_revenue',
    'Season': 'season',
    'gesamt tickets': 'total_season_tickets',
    'gesamt brutto': 'total_season_revenue'
})

# Merge with combined data on season column
combined_data = pd.merge(combined_data, season_tickets, on='season', how='left')

# Save updated data
combined_data.to_csv('Data/data_v1/grizzlys_combined_schedule_sales_v2.csv', index=False)

print(f"Data shape: {combined_data.shape}")
print(f"Columns: {list(combined_data.columns)}")
combined_data.head()

Data shape: (78, 15)
Columns: ['spieltag', 'date', 'weekday', 'time', 'home_team', 'away_team', 'distance', 'season', 'datetime', 'ticket_count', 'gross_revenue', 'season_tickets_sold', 'season_tickets_revenue', 'total_season_tickets', 'total_season_revenue']


Unnamed: 0,spieltag,date,weekday,time,home_team,away_team,distance,season,datetime,ticket_count,gross_revenue,season_tickets_sold,season_tickets_revenue,total_season_tickets,total_season_revenue
0,1,2022-09-16,Friday,19.3,Grizzlys Wolfsburg,Löwen Frankfurt,369.0,22-23,2022-09-16 19:30:00,1509.0,24121.0,978,328504.0,71796,1640176.9
1,4,2022-09-25,Sunday,16.3,Grizzlys Wolfsburg,Nürnberg Ice Tigers,463.0,22-23,2022-09-25 16:30:00,1473.0,15696.0,978,328504.0,71796,1640176.9
2,5,2022-09-27,Tuesday,19.3,Grizzlys Wolfsburg,Augsburger Panther,588.0,22-23,2022-09-27 19:30:00,987.0,7880.0,978,328504.0,71796,1640176.9
3,9,2022-10-09,Sunday,14.0,Grizzlys Wolfsburg,EHC Red Bull München,600.0,22-23,2022-10-09 14:00:00,1336.0,20676.0,978,328504.0,71796,1640176.9
4,11,2022-10-16,Sunday,19.0,Grizzlys Wolfsburg,Schwenninger Wild Wings,638.0,22-23,2022-10-16 19:00:00,1146.0,11196.0,978,328504.0,71796,1640176.9
