In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Data Exploring

In [49]:
events_df = pd.read_csv('./data/events.csv')
events_df.head()

Unnamed: 0,event_id,name,type,url,locale,start_date,end_date,timezone,status,span_multiple_days,venue_id
0,Z698xZ2qZa17W,Paul McCartney: GOT BACK TOUR,event,https://www.ticketmaster.es/event/paul-mccartn...,en-us,2024-12-09,2024-12-09T20:00:00Z,Europe/Madrid,onsale,False,Z198xZ2qZkek
1,Z698xZ2qZa1Ad,Paul McCartney: GOT BACK TOUR,event,https://www.ticketmaster.es/event/paul-mccartn...,en-us,2024-12-10,2024-12-10T20:00:00Z,Europe/Madrid,onsale,False,Z198xZ2qZkek
2,Z698xZ2qZa13A,Imagine Dragons: LOOM World Tour | VIP Packages,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-06-28,2025-06-28T19:30:00Z,Europe/Madrid,onsale,False,Z598xZ2qZ6Fe1
3,Z698xZ2qZa18q,Imagine Dragons: LOOM World Tour,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-06-28,2025-06-28T19:30:00Z,Europe/Madrid,onsale,False,Z598xZ2qZ6Fe1
4,Z698xZ2qZa13R,Imagine Dragons: LOOM World Tour | VIP Packages,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-07-01,2025-07-01T19:30:00Z,Europe/Madrid,onsale,False,Z598xZ2qZd7eF


In [20]:
price_ranges_df = pd.read_csv('./data/price_ranges.csv')
price_ranges_df.head()

Unnamed: 0,event_id,type,currency,min_price,max_price
0,Z698xZ2qZa17W,standard including fees,EUR,51.0,554.5
1,Z698xZ2qZa17W,standard,EUR,45.0,500.0
2,Z698xZ2qZa1Ad,standard including fees,EUR,51.0,558.5
3,Z698xZ2qZa1Ad,standard,EUR,45.0,500.0
4,Z698xZ2qZa13A,standard,EUR,297.0,6757.5


In [21]:
venues_df = pd.read_csv('./data/venues.csv').drop_duplicates()
venues_df.head()

Unnamed: 0,venue_id,name,city,state,country,address,postalCode,latitude,longitude,upcomingEvents
0,Z198xZ2qZkek,WiZink Center,Madrid,Madrid,Spain,"Av. Felipe II, s/n",28009.0,40.42394,-3.6758,37
1,Z198xZ2qZkek,WiZink Center,Madrid,Madrid,Spain,"Av. Felipe II, s/n",28009.0,40.42394,-3.6758,37
2,Z598xZ2qZ6Fe1,Estadio Riyadh Air Metropolitano,Madrid,Madrid,Spain,"Av. de Luis Aragonés, 4",28022.0,40.43624,-3.59947,8
3,Z598xZ2qZ6Fe1,Estadio Riyadh Air Metropolitano,Madrid,Madrid,Spain,"Av. de Luis Aragonés, 4",28022.0,40.43624,-3.59947,8
4,Z598xZ2qZd7eF,Estadi Olímpic Lluis Companys,Barcelona,Barcelona,Spain,"Passeig Olímpic, 5-7",8038.0,41.36476,2.15565,2


In [22]:
events_df['type'].unique()

array(['event'], dtype=object)

There is no clear relation between artists and events, so we will have to search it by artist name in the artists.csv file.

In [23]:
artist_df = pd.read_csv('./data/artists.csv')
artist_df.head()

Unnamed: 0,artist_id,name,type,locale,external_links
0,K8vZ9171uq0,Paul McCartney,attraction,en-us,{'youtube': [{'url': 'https://www.youtube.com/...
1,K8vZ9171uq0,Paul McCartney,attraction,en-us,{'youtube': [{'url': 'https://www.youtube.com/...
2,K8vZ917GSz7,Imagine Dragons,attraction,en-us,{'youtube': [{'url': 'https://www.youtube.com/...
3,K8vZ917GSz7,Imagine Dragons,attraction,en-us,{'youtube': [{'url': 'https://www.youtube.com/...
4,K8vZ917GSz7,Imagine Dragons,attraction,en-us,{'youtube': [{'url': 'https://www.youtube.com/...


In [24]:
genres_df = pd.read_csv('./data/genres.csv')
genres_df.head()

Unnamed: 0,genre_id,genre_name,sub_genre_id,sub_genre_name
0,KnvZfZ7vAeA,Rock,KZazBEonSMnZfZ7v6F1,Pop
1,KnvZfZ7vAvl,Other,KZazBEonSMnZfZ7vk1I,Other
2,KnvZfZ7vAAk,Wrestling,KZazBEonSMnZfZ7vFna,Wrestling
3,KnvZfZ7vAev,Pop,KZazBEonSMnZfZ7vkJv,African Pop
4,KnvZfZ7vAe6,Undefined,KZazBEonSMnZfZ7v6JI,Undefined


In [36]:
artist_genre_df = pd.read_csv('./data/artist_genre_data.csv')
artist_genre_df.head()

Unnamed: 0,artist_name,artist_id,genre_name,genre_id,sub_genre_name,sub_genre_id,Genre
0,Paul McCartney,K8vZ9171uq0,Rock,KnvZfZ7vAeA,Pop,KZazBEonSMnZfZ7v6F1,Rock/Pop
1,Paul McCartney,K8vZ9171uq0,Rock,KnvZfZ7vAeA,Pop,KZazBEonSMnZfZ7v6F1,Rock/Pop
2,Imagine Dragons,K8vZ917GSz7,Rock,KnvZfZ7vAeA,Pop,KZazBEonSMnZfZ7v6F1,Rock/Pop
3,Imagine Dragons,K8vZ917GSz7,Rock,KnvZfZ7vAeA,Pop,KZazBEonSMnZfZ7v6F1,Rock/Pop
4,Imagine Dragons,K8vZ917GSz7,Rock,KnvZfZ7vAeA,Pop,KZazBEonSMnZfZ7v6F1,Rock/Pop


## Analysis 1: Average ticket price per city

In [25]:
event_prices_df = events_df.merge(price_ranges_df[price_ranges_df['type']=='standard'], on='event_id')
event_prices_venues_df = event_prices_df.merge(venues_df.drop_duplicates(), on='venue_id')

In [26]:
event_prices_venues_df

Unnamed: 0,event_id,name_x,type_x,url,locale,start_date,end_date,timezone,status,span_multiple_days,...,max_price,name_y,city,state,country,address,postalCode,latitude,longitude,upcomingEvents
0,Z698xZ2qZa17W,Paul McCartney: GOT BACK TOUR,event,https://www.ticketmaster.es/event/paul-mccartn...,en-us,2024-12-09,2024-12-09T20:00:00Z,Europe/Madrid,onsale,False,...,500.0,WiZink Center,Madrid,Madrid,Spain,"Av. Felipe II, s/n",28009.0,40.42394,-3.67580,37
1,Z698xZ2qZa1Ad,Paul McCartney: GOT BACK TOUR,event,https://www.ticketmaster.es/event/paul-mccartn...,en-us,2024-12-10,2024-12-10T20:00:00Z,Europe/Madrid,onsale,False,...,500.0,WiZink Center,Madrid,Madrid,Spain,"Av. Felipe II, s/n",28009.0,40.42394,-3.67580,37
2,Z698xZ2qZa13A,Imagine Dragons: LOOM World Tour | VIP Packages,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-06-28,2025-06-28T19:30:00Z,Europe/Madrid,onsale,False,...,6757.5,Estadio Riyadh Air Metropolitano,Madrid,Madrid,Spain,"Av. de Luis Aragonés, 4",28022.0,40.43624,-3.59947,8
3,Z698xZ2qZa18q,Imagine Dragons: LOOM World Tour,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-06-28,2025-06-28T19:30:00Z,Europe/Madrid,onsale,False,...,260.0,Estadio Riyadh Air Metropolitano,Madrid,Madrid,Spain,"Av. de Luis Aragonés, 4",28022.0,40.43624,-3.59947,8
4,Z698xZ2qZa13R,Imagine Dragons: LOOM World Tour | VIP Packages,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-07-01,2025-07-01T19:30:00Z,Europe/Madrid,onsale,False,...,6757.5,Estadi Olímpic Lluis Companys,Barcelona,Barcelona,Spain,"Passeig Olímpic, 5-7",8038.0,41.36476,2.15565,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,Z698xZ2qZ16vOg_prK,Jamiroquai - The Heels Of Steel Tour | VIP Pac...,event,https://www.ticketmaster.es/event/jamiroquai-t...,en-us,2025-11-06,2025-11-06T20:00:00Z,Europe/Madrid,onsale,False,...,215.0,Palau Sant Jordi,Barcelona,Barcelona,Spain,"Passeig Olímpic, 5-7",8038.0,41.36337,2.15259,26
180,Z698xZ2qZ16vaY1d08,Jamiroquai - The Heels Of Steel Tour,event,https://www.ticketmaster.es/event/jamiroquai-t...,en-us,2025-11-06,2025-11-06T20:00:00Z,Europe/Madrid,onsale,False,...,105.0,Palau Sant Jordi,Barcelona,Barcelona,Spain,"Passeig Olímpic, 5-7",8038.0,41.36337,2.15259,26
181,Z698xZ2qZ16vO4Q4fF,OneRepublic - Escape to Europe 2025,event,https://www.ticketmaster.es/event/onerepublic-...,en-us,2025-11-14,2025-11-14T20:00:00Z,Europe/Madrid,onsale,False,...,176.0,Palacio Vistalegre,Madrid,Madrid,Spain,Acceso Sur. C/ Matilde Hernández s/n,28025.0,40.38600,-3.73850,33
182,Z698xZ2qZ1kfpaV80,OneRepublic - Escape to Europe 2025 | Paquetes...,event,https://www.ticketmaster.es/event/onerepublic-...,en-us,2025-11-14,2025-11-14T20:00:00Z,Europe/Madrid,onsale,False,...,285.0,Palacio Vistalegre,Madrid,Madrid,Spain,Acceso Sur. C/ Matilde Hernández s/n,28025.0,40.38600,-3.73850,33


In [154]:
event_prices_venues_df['average_price'] = (event_prices_venues_df['min_price'] + event_prices_venues_df['max_price']) / 2
average_prices_df = event_prices_venues_df.groupby('city').agg({'average_price':'mean'}).reset_index()

In [155]:
average_prices_df

Unnamed: 0,city,average_price
0,A Coruña,132.864583
1,Badalona,118.75
2,Barakaldo,79.5
3,Barcelona,134.084677
4,Bilbao,52.0
5,Granada,39.25
6,Madrid,158.029167
7,Mislata,35.0
8,Málaga,39.5
9,Pamplona/Iruña,137.775


In [None]:
city_coords = {
    'A Coruña': (43.3709, -8.3733),
    'Badalona': (41.4467, 2.2434),
    'Barakaldo': (43.3066, -2.9885),
    'Barcelona': (41.3851, 2.1734),
    'Bilbao': (43.2631, -2.9249),
    'Granada': (37.1786, -3.5981),
    'Madrid': (40.4168, -3.7033),
    'Mislata': (39.4906, -0.4406),
    'Málaga': (36.7213, -4.4212),
    'Pamplona/Iruña': (42.8154, -1.6444),
    'San Cristóbal de La Laguna': (28.4636, -16.2518),
    'Sevilla': (37.3825, -5.9845),
    'Valencia': (39.4699, -0.3761),
    'Zaragoza': (41.6488, -0.8765)
}

In [None]:
average_prices_df['lat'] = average_prices_df['city'].map(lambda x: city_coords[x][0])
average_prices_df['lon'] = average_prices_df['city'].map(lambda x: city_coords[x][1])

price_types = ['min_price', 'max_price']
colors = ['rgb(33,113,181)', 'rgb(239,138,98)']

In [153]:
fig = go.Figure()

for i, price_type in enumerate(price_types):
    fig.add_trace(go.Scattermapbox(
        lat=average_prices_df['lat'],
        lon=average_prices_df['lon'],
        mode='markers',
        marker=go.scattermapbox.Marker(
            size=average_prices_df[price_type] / 2,  
            color=colors[i],
            opacity=0.6
        ),
        text=average_prices_df.apply(lambda row: f"{row['city']}<br>{price_type.title()}: {row[price_type]}", axis=1),
        name=price_type.title()
    ))

fig.update_layout(
    mapbox=dict(
        style="carto-positron",
        zoom=5,
        center={"lat": sum(coord[0] for coord in city_coords.values()) / len(city_coords),
                "lon": sum(coord[1] for coord in city_coords.values()) / len(city_coords)}
    ),
    margin={"r": 0, "t": 30, "l": 0, "b": 0},
    title="Bubble Map of Min and Max Prices by City",
    height=800,
    width=1000
)

fig.show()


In [164]:
fig = go.Figure()

fig.add_trace(go.Scattermapbox(
    lat=average_prices_df['lat'],
    lon=average_prices_df['lon'],
    mode='markers',
    marker=go.scattermapbox.Marker(
        size=average_prices_df['average_price']*10,  # Scale size
        color=average_prices_df['average_price'],
        colorscale='Viridis',  # Gradient color scale
        showscale=True,  # Display colorbar
        sizemode='area',
        colorbar=dict(title="Avg Price")
    ),
    text=average_prices_df.apply(lambda row: f"{row['city']}<br>Avg Price: {row['average_price']:.2f}", axis=1),
    name="Average Price"
))

# Update layout to mimic the px.scatter_mapbox style
fig.update_layout(
    mapbox=dict(
        style="carto-positron",
        zoom=5,
        center={"lat": average_prices_df['lat'].mean(), "lon": average_prices_df['lon'].mean()}
    ),
    margin={"r": 0, "t": 30, "l": 0, "b": 0},
    title="Bubble Map of Average Prices by Venue in Madrid",
    height=800,
    width=1000
)

# Show the map
fig.show()

## Analysis 2: Similirity in distribution of prices per city

In [None]:
cities = event_prices_venues_df['state'].unique()

In [105]:
subplot_titles = []
for city in cities:
    subplot_titles.append(f"{city} Min Price Distribution")
    subplot_titles.append(f"{city} Max Price Distribution")
    
fig = make_subplots(
    rows=len(cities),
    cols=2,
    subplot_titles=subplot_titles,
    vertical_spacing=0.05
)

for i, city in enumerate(cities, start=1):
    city_data = event_prices_venues_df[event_prices_venues_df['state'] == city]
    
    # Calculate frequency for min_price
    min_price_counts = city_data['min_price'].value_counts().sort_index()
    min_price_x = min_price_counts.index
    min_price_y = min_price_counts.values
    
    # Calculate frequency for max_price
    max_price_counts = city_data['max_price'].value_counts().sort_index()
    max_price_x = max_price_counts.index
    max_price_y = max_price_counts.values
    
    fig.add_trace(
        go.Scatter(
            x=min_price_x,
            y=min_price_y,
            mode='lines+markers',
            name=f'{city} Min Price',
            line=dict(color='blue', width=2),
            opacity=0.7
        ),
        row=i,
        col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=max_price_x,
            y=max_price_y,
            mode='lines+markers',
            name=f'{city} Max Price',
            line=dict(color='orange', width=2),
            opacity=0.7
        ),
        row=i,
        col=2
    )

fig.update_layout(
    height=300 * len(cities),  
    width=1200,
    title_text="Min and Max Price Distributions by City (Lines)",
    showlegend=False
)

# Update axis labels
for i in range(1, len(cities) + 1):
    fig.update_xaxes(title_text="Price", row=i, col=1)
    fig.update_xaxes(title_text="Price", row=i, col=2)
    fig.update_yaxes(title_text="Count", row=i, col=1)
    fig.update_yaxes(title_text="Count", row=i, col=2)

# Show the plot
fig.show()


In [165]:
subplot_titles = [f"{city} Average Price Distribution" for city in cities]

fig = make_subplots(
    rows=len(cities),
    cols=1,
    subplot_titles=subplot_titles,
    vertical_spacing=0.05
)

for i, city in enumerate(cities, start=1):
    city_data = event_prices_venues_df[event_prices_venues_df['state'] == city]
    
    city_data['average_price'] = city_data[['min_price', 'max_price']].mean(axis=1)
    
    avg_price_counts = city_data['average_price'].value_counts().sort_index()
    avg_price_x = avg_price_counts.index
    avg_price_y = avg_price_counts.values
    
    fig.add_trace(
        go.Scatter(
            x=avg_price_x,
            y=avg_price_y,
            mode='lines+markers',
            name=f'{city} Average Price',
            line=dict(color='green', width=2),
            opacity=0.7
        ),
        row=i,
        col=1
    )


fig.update_layout(
    height=300 * len(cities),  
    width=800,
    title_text="Average Price Distributions by City (Lines)",
    showlegend=False
)

for i in range(1, len(cities) + 1):
    fig.update_xaxes(title_text="Average Price", row=i, col=1)
    fig.update_yaxes(title_text="Count", row=i, col=1)

fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

## Analysis 3: Average prices by genre

In [64]:
events_df = pd.read_csv('./data/ticketmaster_cleaned_events.csv')

In [67]:
events_df.head()

Unnamed: 0,event_id,event_name,event_type,event_url,locale,start_date,start_time,timezone,status,venue_id,...,venue_city,venue_state,venue_country,venue_address,venue_postal_code,venue_latitude,venue_longitude,segment_name,genre_name,sub_genre_name
0,Z698xZ2qZa17W,Paul McCartney: GOT BACK TOUR,event,https://www.ticketmaster.es/event/paul-mccartn...,en-us,2024-12-09,21:00:00,Europe/Madrid,onsale,Z198xZ2qZkek,...,Madrid,Madrid,Spain,"Av. Felipe II, s/n",28009.0,40.42394,-3.6758,Music,Rock,Pop
1,Z698xZ2qZa1Ad,Paul McCartney: GOT BACK TOUR,event,https://www.ticketmaster.es/event/paul-mccartn...,en-us,2024-12-10,21:00:00,Europe/Madrid,onsale,Z198xZ2qZkek,...,Madrid,Madrid,Spain,"Av. Felipe II, s/n",28009.0,40.42394,-3.6758,Music,Rock,Pop
2,Z698xZ2qZa13A,Imagine Dragons: LOOM World Tour | VIP Packages,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-06-28,21:30:00,Europe/Madrid,onsale,Z598xZ2qZ6Fe1,...,Madrid,Madrid,Spain,"Av. de Luis Aragonés, 4",28022.0,40.43624,-3.59947,Music,Rock,Pop
3,Z698xZ2qZa18q,Imagine Dragons: LOOM World Tour,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-06-28,21:30:00,Europe/Madrid,onsale,Z598xZ2qZ6Fe1,...,Madrid,Madrid,Spain,"Av. de Luis Aragonés, 4",28022.0,40.43624,-3.59947,Music,Rock,Pop
4,Z698xZ2qZa13R,Imagine Dragons: LOOM World Tour | VIP Packages,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-07-01,21:30:00,Europe/Madrid,onsale,Z598xZ2qZd7eF,...,Barcelona,Barcelona,Spain,"Passeig Olímpic, 5-7",8038.0,41.36476,2.15565,Music,Rock,Pop


In [77]:
events_prices_df = events_df.merge(price_ranges_df, on='event_id', how='inner')

In [78]:
events_prices_df = events_prices_df[events_prices_df['type'] == 'standard'].drop_duplicates(subset='event_id')

In [82]:
events_prices_df

Unnamed: 0,event_id,event_name,event_type,event_url,locale,start_date,start_time,timezone,status,venue_id,...,venue_postal_code,venue_latitude,venue_longitude,segment_name,genre_name,sub_genre_name,type,currency,min_price,max_price
1,Z698xZ2qZa17W,Paul McCartney: GOT BACK TOUR,event,https://www.ticketmaster.es/event/paul-mccartn...,en-us,2024-12-09,21:00:00,Europe/Madrid,onsale,Z198xZ2qZkek,...,28009.0,40.42394,-3.67580,Music,Rock,Pop,standard,EUR,45.0,500.0
3,Z698xZ2qZa1Ad,Paul McCartney: GOT BACK TOUR,event,https://www.ticketmaster.es/event/paul-mccartn...,en-us,2024-12-10,21:00:00,Europe/Madrid,onsale,Z198xZ2qZkek,...,28009.0,40.42394,-3.67580,Music,Rock,Pop,standard,EUR,45.0,500.0
4,Z698xZ2qZa13A,Imagine Dragons: LOOM World Tour | VIP Packages,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-06-28,21:30:00,Europe/Madrid,onsale,Z598xZ2qZ6Fe1,...,28022.0,40.43624,-3.59947,Music,Rock,Pop,standard,EUR,297.0,6757.5
6,Z698xZ2qZa18q,Imagine Dragons: LOOM World Tour,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-06-28,21:30:00,Europe/Madrid,onsale,Z598xZ2qZ6Fe1,...,28022.0,40.43624,-3.59947,Music,Rock,Pop,standard,EUR,65.0,260.0
8,Z698xZ2qZa13R,Imagine Dragons: LOOM World Tour | VIP Packages,event,https://www.ticketmaster.es/event/imagine-drag...,en-us,2025-07-01,21:30:00,Europe/Madrid,onsale,Z598xZ2qZd7eF,...,8038.0,41.36476,2.15565,Music,Rock,Pop,standard,EUR,297.0,6757.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,Z698xZ2qZ16vOg_prK,Jamiroquai - The Heels Of Steel Tour | VIP Pac...,event,https://www.ticketmaster.es/event/jamiroquai-t...,en-us,2025-11-06,21:00:00,Europe/Madrid,onsale,Z598xZ2qZe6d7,...,8038.0,41.36337,2.15259,Music,Rock,Pop,standard,EUR,205.0,215.0
340,Z698xZ2qZ16vaY1d08,Jamiroquai - The Heels Of Steel Tour,event,https://www.ticketmaster.es/event/jamiroquai-t...,en-us,2025-11-06,21:00:00,Europe/Madrid,onsale,Z598xZ2qZe6d7,...,8038.0,41.36337,2.15259,Music,Rock,Pop,standard,EUR,60.0,105.0
342,Z698xZ2qZ16vO4Q4fF,OneRepublic - Escape to Europe 2025,event,https://www.ticketmaster.es/event/onerepublic-...,en-us,2025-11-14,21:00:00,Europe/Madrid,onsale,Z598xZ2qZe7v1,...,28025.0,40.38600,-3.73850,Music,Rock,Pop,standard,EUR,60.0,176.0
345,Z698xZ2qZ1kfpaV80,OneRepublic - Escape to Europe 2025 | Paquetes...,event,https://www.ticketmaster.es/event/onerepublic-...,en-us,2025-11-14,21:00:00,Europe/Madrid,onsale,Z598xZ2qZe7v1,...,28025.0,40.38600,-3.73850,Music,Rock,Pop,standard,EUR,175.0,285.0


In [180]:
grouped = events_prices_df[events_prices_df['segment_name']=='Music'].groupby("sub_genre_name").agg(
    avg_min_price=("min_price", "mean"),
    avg_max_price=("max_price", "mean"),
    count=("event_id", "size")
).reset_index()

In [181]:
fig = px.scatter(
    grouped,
    x="sub_genre_name",
    y="avg_min_price",
    size="count",
    size_max=100,
    hover_name="sub_genre_name",
    hover_data={"count": True, "avg_max_price": True},
    title="Bubble Plot of Sub-Genre and Price Ranges",
    labels={
        "sub_genre_name": "Sub-Genre",
        "avg_min_price": "Average Min Price",
        "count": "Number of Events"
    }
)
fig.update_layout(
    xaxis_title="Sub-Genre",
    yaxis_title="Average Min Price",
    xaxis_tickangle=45,
    height=600,
    width=1000
)

fig.show()


In [183]:
grouped = events_prices_df.groupby("segment_name").agg(
    avg_min_price=("min_price", "mean"),
    avg_max_price=("max_price", "mean"),
    count=("event_id", "size")  # Count the number of events per sub_genre
).reset_index()

In [184]:

# Plot
plt.figure(figsize=(12, 8))# Create a bubble plot
fig = px.scatter(
    grouped,
    x="segment_name",
    y="avg_min_price",
    size="count",
    size_max=100,
    hover_name="segment_name",
    hover_data={"count": True, "avg_max_price": True},
    title="Bubble Plot of Segment and Price Ranges",
    labels={
        "segment_name": "Segment",
        "avg_min_price": "Average Min Price",
        "count": "Number of Events"
    }
)

# Update layout for better readability
fig.update_layout(
    xaxis_title="Segment",
    yaxis_title="Average Min Price",
    xaxis_tickangle=45,
    height=600,
    width=1000
)

# Show the plot
fig.show()


<Figure size 1200x800 with 0 Axes>

## Analysis 4: Venue locations in Madrid

In [185]:
madrid_data = events_prices_df[events_prices_df['venue_state'] == 'Madrid']
madrid_data = madrid_data[madrid_data['segment_name'] == 'Music']

venues_count = madrid_data.groupby(['venue_id', 'venue_name', 'venue_latitude', 'venue_longitude', 'sub_genre_name']).size().reset_index(name='event_count')

In [186]:
fig = px.scatter_mapbox(
    venues_count,
    lat="venue_latitude",
    lon="venue_longitude",
    size="event_count",  
    hover_name="venue_name",
    hover_data={"event_count": True},
    color="sub_genre_name",  
    size_max=50,
    title="Count of Events by Venue in Madrid"
)

fig.update_layout(
    mapbox_style="carto-positron",
    mapbox_zoom=11,
    mapbox_center={"lat": madrid_data['venue_latitude'].mean(), "lon": madrid_data['venue_longitude'].mean()},
    margin={"r": 0, "t": 30, "l": 0, "b": 0},
    height=600,
    width=1000
)

# Show the map
fig.show()


## Analysis 5: Prices by venue

In [152]:
# Filter data for Madrid
madrid_data = events_prices_df[events_prices_df['venue_city'] == 'Madrid']

# Aggregate data by venue, min_price, and max_price
venues_data = madrid_data.groupby(['venue_id', 'venue_name', 'venue_latitude', 'venue_longitude']).agg(
    min_price=('min_price', 'mean'),
    max_price=('max_price', 'mean')
).reset_index()

# Define colors and labels for price types
price_types = ['min_price', 'max_price']
colors = ['rgb(33,113,181)', 'rgb(239,138,98)']

# Create a figure
fig = go.Figure()

# Add layers for each price type
for i, price_type in enumerate(price_types):
    fig.add_trace(go.Scattermapbox(
        lat=venues_data['venue_latitude'],
        lon=venues_data['venue_longitude'],
        mode='markers',
        marker=go.scattermapbox.Marker(
            size=venues_data[price_type] / 4,  # Scale size
            color=colors[i],
            opacity=0.6
        ),
        text=venues_data.apply(lambda row: f"{row['venue_name']}<br>{price_type.title().replace('_', ' ')}: {row[price_type]:.2f}", axis=1),
        name=price_type.title().replace('_', ' ')
    ))

# Update layout to mimic the px.scatter_mapbox style
fig.update_layout(
    mapbox=dict(
        style="carto-positron",
        zoom=11,
        center={"lat": madrid_data['venue_latitude'].mean(), "lon": madrid_data['venue_longitude'].mean()}
    ),
    margin={"r": 0, "t": 40, "l": 0, "b": 0},
    title="Bubble Map of Min and Max Prices by Venue in Madrid",
    legend=dict(traceorder="reversed"),
    height=600,
    width=1000
)

# Show the map
fig.show()
