In [1]:
import pandas as pd
from utils import load_data

In [2]:
df = load_data()

  data = pd.read_csv('data/events_usa_clean.csv')


In [4]:
df.columns

Index(['event_id', 'event_name', 'event_type', 'event_url', 'locale',
       'start_date', 'start_time', 'timezone', 'status', 'venue_id',
       'venue_name', 'venue_city', 'venue_state', 'venue_country',
       'venue_address', 'venue_postal_code', 'venue_latitude',
       'venue_longitude', 'segment_name', 'genre_name', 'sub_genre_name',
       'attraction_id', 'attraction_name', 'min_price', 'max_price',
       'currency'],
      dtype='object')

In [5]:
segments = df.groupby(['segment_name'])['genre_name'].apply(set).to_dict()

In [6]:
segments

{'Arts & Theatre': {"Children's Theatre",
  'Circus & Specialty Acts',
  'Classical',
  'Comedy',
  'Cultural',
  'Dance',
  'Fashion',
  'Fine Art',
  'Magic & Illusion',
  'Miscellaneous',
  'Miscellaneous Theatre',
  'Multimedia',
  'Music',
  'Opera',
  'Performance Art',
  'Spectacular',
  'Theatre',
  'Variety'},
 'Film': {'Action/Adventure',
  'Animation',
  'Comedy',
  'Documentary',
  'Drama',
  'Family',
  'Miscellaneous',
  'Music'},
 'Miscellaneous': {'Community/Civic',
  'Community/Cultural',
  'Fairs & Festivals',
  'Family',
  'Food & Drink',
  'Health/Wellness',
  'Hobby/Special Interest Expos',
  'Holiday',
  'Ice Shows',
  'Lecture/Seminar',
  'Multimedia',
  'Psychics/Mediums/Hypnotists',
  'Undefined'},
 'Music': {'Alternative',
  'Ballads/Romantic',
  'Blues',
  "Children's Music",
  'Classical',
  'Country',
  'Dance/Electronic',
  'Folk',
  'Hip-Hop/Rap',
  'Holiday',
  'Jazz',
  'Latin',
  'Metal',
  'New Age',
  'Other',
  'Pop',
  'R&B',
  'Reggae',
  'Religio

In [9]:
# fucniton taht gets for a genre and a segment the top attractions

def get_top_attractions(segment, genre, num_results=10):
    if genre != 'All':
        filtered_df = df[(df['segment_name'] == segment) & (df['genre_name'] == genre)]
    else:
        filtered_df = df[df['segment_name'] == segment]
    
    # Group by attraction_name to compute relevant stats
    top_attractions = (
        filtered_df.groupby('attraction_name')
        .agg(
            venue_state=('venue_state', lambda x: ', '.join(sorted(set(x)))),
            avg_min_price=('min_price', 'mean'),
            avg_max_price=('max_price', 'mean'),
            count=('attraction_name', 'size')  # Used for sorting
        )
        .sort_values(by='count', ascending=False)  # Sort by count
        .head(num_results)  # Limit to top attractions
        .reset_index()  # Reset index to make attraction_name a column
        .drop(columns='count')  # Drop the count column after sorting
    )
    
    return top_attractions

In [7]:
segment = 'Music'
genre = 'All'

In [10]:
get_top_attractions(segment, genre)

Unnamed: 0,attraction_name,venue_state,avg_min_price,avg_max_price
0,MJ LIVE - Michael Jackson Tribute,"Florida, Illinois, Indiana, Michigan, Nevada",75.346056,100.480916
1,Jabbawockeez,Nevada,57.21,107.21
2,The Australian Bee Gees (Vegas),Nevada,50.504444,81.504444
3,Soul of Motown,Nevada,44.419825,74.419825
4,The Rat Pack Is Back (Las Vegas),Nevada,89.95,99.95
5,The King Comes Home,Nevada,39.99,69.99
6,Purple Reign - Prince Tribute,Nevada,86.99,111.49
7,Donny Osmond (Las Vegas),Nevada,65.0,150.0
8,Chris Botti,"Florida, Iowa, New York, Virginia",92.150417,227.091667
9,Barry Manilow,Nevada,57.874615,399.220769


In [11]:
get_top_attractions(segment, 'Blues')

Unnamed: 0,attraction_name,venue_state,avg_min_price,avg_max_price
0,Blues is Alright,"Alabama, California, District of Columbia, Flo...",59.380952,200.761905
1,Joe Bonamassa,"Alabama, Connecticut, Florida, Georgia, Illino...",57.133333,227.133333
2,Joanne Shaw Taylor,"Florida, Maine, Michigan, Mississippi, New Yor...",24.5,53.318182
3,Tab Benoit,"Florida, Michigan, Mississippi, Ohio, South Ca...",32.271818,46.408182
4,Daniel Donato,"Arizona, California, Michigan, Ohio, Oregon, T...",26.5,39.9
5,Keb' Mo',"Alabama, Florida, Illinois, Indiana, North Car...",44.5625,88.6875
6,Kenny Wayne Shepherd,"Arkansas, Colorado, Florida, Illinois, Kansas",54.178571,93.142857
7,Robert Cray Band,"Florida, Illinois, Minnesota, Nevada, Pennsylv...",40.332857,83.674286
8,Jackie Venson,Texas,26.666667,26.666667
9,Samantha Fish,"California, Louisiana, Mississippi, Washington",41.2,58.775
