In [1]:
## 🚀 Initial Multi-Dataset Exploration
# Fundamentals of Data Science Capstone - Phase 3: Data Understanding

import pandas as pd
import os
import glob

# --- Configuration ---
# Set the directory where your converted CSVs are located
DATA_DIR = 'raw/csv' 
# Define the file pattern to search for
FILE_PATTERN = os.path.join(DATA_DIR, '*.csv')

# Dictionary to hold all DataFrames, keyed by filename
all_dataframes = {}

print("--- 1. Loading All CSV Files ---")
# Use glob to find all CSV files matching the pattern
csv_files = glob.glob(FILE_PATTERN)

if not csv_files:
    print(f"ERROR: No CSV files found in {DATA_DIR}. Please check the directory path.")
else:
    for filepath in csv_files:
        filename = os.path.basename(filepath)
        print(f"Loading: {filename}...")
        try:
            # Load the CSV. Adjust encoding or separator if necessary (e.g., encoding='latin1', sep=';')
            df = pd.read_csv(filepath)
            all_dataframes[filename] = df
            print(f"  -> Success! Shape: {df.shape}")
        except Exception as e:
            print(f"  -> FAILED to load {filename}: {e}")

print(f"\nSuccessfully loaded {len(all_dataframes)} DataFrames.")


# --- 2. High-Level Data Quality Check & Inspection ---

print("\n--- 2. High-Level Data Quality Check ---")

# Table to summarize key metrics for all datasets
summary_data = []

for name, df in all_dataframes.items():
    n_rows, n_cols = df.shape
    n_duplicates = df.duplicated().sum()
    
    # Calculate total missing values across the whole DataFrame
    total_missing = df.isnull().sum().sum()
    
    summary_data.append({
        'Dataset': name,
        'Rows': n_rows,
        'Columns': n_cols,
        'Duplicated Rows': n_duplicates,
        'Total Missing Values': total_missing
    })

# Create the summary table
summary_df = pd.DataFrame(summary_data)
print("\n--- Dataset Overview ---")
print(summary_df)

print("\n--- Initial Inspection (Head & Info) ---")
for name, df in all_dataframes.items():
    print(f"\n==================================================")
    print(f"DATASET: {name}")
    print(f"==================================================")
    
    # Check the first few rows
    print("\n[A] Head (First 5 Rows):")
    display(df.head()) # Use 'display' in Jupyter for rich output
    
    # Check data types and non-null counts
    print("\n[B] Info (Data Types and Missing Counts per Column):")
    df.info()

    # Descriptive statistics for numeric columns
    print("\n[C] Descriptive Statistics (Numeric):")
    display(df.describe())

--- 1. Loading All CSV Files ---
Loading: ticketmaster_us_events_20251207_075923.csv...
  -> FAILED to load ticketmaster_us_events_20251207_075923.csv: No columns to parse from file
Loading: phq_events_20251207_054509.csv...
  -> Success! Shape: (150, 13)
Loading: phq_events_full_20251207_063822.csv...
  -> Success! Shape: (30000, 11)
Loading: phq_events_20251207_054229.csv...
  -> Success! Shape: (50, 13)
Loading: phq_events_full_20251207_060912.csv...
  -> Success! Shape: (30000, 11)
Loading: bandsintown_event_panel.csv...
  -> FAILED to load bandsintown_event_panel.csv: No columns to parse from file
Loading: bandsintown_artist_panel.csv...
  -> FAILED to load bandsintown_artist_panel.csv: No columns to parse from file
Loading: setlistfm_us_concerts_20251207_080243.csv...
  -> Success! Shape: (2000, 15)
Loading: bandsintown_artist_slugs.csv...
  -> Success! Shape: (0, 1)
Loading: musicbrainz_structured_features.csv...
  -> Success! Shape: (98389, 7)
Loading: setlistfm_us_concerts_202

Unnamed: 0,id,title,category,start,end,rank,local_rank,phq_attendance,location,labels,entities,duration,updated
0,3Ad6pUgSsDZ9YyCuGu,Deer Tick,concerts,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,32,64,120.0,[-70.7040084 42.0919348],,"[{'category': None, 'description': None, 'enti...",0,2025-11-18T20:12:23Z
1,3NKX7YnztkZp6zvndF,Watchtower,concerts,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,39,62,287.0,[-97.3951022 27.7977195],,"[{'category': None, 'description': None, 'enti...",0,2025-09-22T00:14:57Z
2,3NjBcgX3Hewhk4SgRe,Max Stalling and Heather Stalling,concerts,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,36,59,207.0,[-90.0249 35.0611],,"[{'category': None, 'description': None, 'enti...",0,2025-09-22T00:09:59Z
3,3UDqEwrBU5ZZ6MPmhD,Sunday Contra at the Monday Club- September,concerts,2025-09-08T00:00:00Z,2025-09-08T02:30:00Z,39,65,289.0,[-90.355103 38.591047],,"[{'category': None, 'description': None, 'enti...",9000,2025-09-22T00:09:59Z
4,3dYkukhpwpErsSgxE5,Lulada Club,concerts,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,40,43,302.0,[-73.9573947 40.7218813],,"[{'category': None, 'description': None, 'enti...",0,2025-12-05T21:57:38Z



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              150 non-null    object 
 1   title           150 non-null    object 
 2   category        150 non-null    object 
 3   start           150 non-null    object 
 4   end             150 non-null    object 
 5   rank            150 non-null    int64  
 6   local_rank      150 non-null    int64  
 7   phq_attendance  148 non-null    float64
 8   location        150 non-null    object 
 9   labels          0 non-null      float64
 10  entities        150 non-null    object 
 11  duration        150 non-null    int64  
 12  updated         150 non-null    object 
dtypes: float64(2), int64(3), object(8)
memory usage: 15.4+ KB

[C] Descriptive Statistics (Numeric):


Unnamed: 0,rank,local_rank,phq_attendance,labels,duration
count,150.0,150.0,148.0,0.0,150.0
mean,40.666667,61.026667,2719.459459,,58912.73
std,14.147197,15.941987,10041.610315,,361381.9
min,0.0,0.0,69.0,,0.0
25%,30.0,53.0,100.0,,0.0
50%,38.5,59.0,279.5,,0.0
75%,43.75,66.75,481.25,,10800.0
max,88.0,100.0,80000.0,,4233599.0



DATASET: phq_events_full_20251207_063822.csv

[A] Head (First 5 Rows):


Unnamed: 0,id,title,start,end,category,labels,rank,local_rank,phq_attendance,location,updated
0,3Ad6pUgSsDZ9YyCuGu,Deer Tick,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,32,64,120.0,[-70.7040084 42.0919348],2025-11-18T20:12:23Z
1,3NKX7YnztkZp6zvndF,Watchtower,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,39,62,287.0,[-97.3951022 27.7977195],2025-09-22T00:14:57Z
2,3NjBcgX3Hewhk4SgRe,Max Stalling and Heather Stalling,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,36,59,207.0,[-90.0249 35.0611],2025-09-22T00:09:59Z
3,3UDqEwrBU5ZZ6MPmhD,Sunday Contra at the Monday Club- September,2025-09-08T00:00:00Z,2025-09-08T02:30:00Z,concerts,,39,65,289.0,[-90.355103 38.591047],2025-09-22T00:09:59Z
4,3dYkukhpwpErsSgxE5,Lulada Club,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,40,43,302.0,[-73.9573947 40.7218813],2025-12-05T21:57:38Z



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              30000 non-null  object 
 1   title           30000 non-null  object 
 2   start           30000 non-null  object 
 3   end             30000 non-null  object 
 4   category        30000 non-null  object 
 5   labels          0 non-null      float64
 6   rank            30000 non-null  int64  
 7   local_rank      30000 non-null  int64  
 8   phq_attendance  29714 non-null  float64
 9   location        30000 non-null  object 
 10  updated         30000 non-null  object 
dtypes: float64(2), int64(2), object(7)
memory usage: 2.5+ MB

[C] Descriptive Statistics (Numeric):


Unnamed: 0,labels,rank,local_rank,phq_attendance
count,0.0,30000.0,30000.0,29714.0
mean,,40.163467,63.576833,1663.557
std,,11.847448,13.700361,15465.9
min,,0.0,0.0,7.0
25%,,31.0,56.0,120.25
50%,,38.0,62.0,253.0
75%,,44.0,71.0,535.0
max,,100.0,100.0,1538461.0



DATASET: phq_events_20251207_054229.csv

[A] Head (First 5 Rows):


Unnamed: 0,id,title,start,end,category,labels,rank,local_rank,phq_attendance,location,duration,place_hierarchies,updated
0,EjMD6djcRnZqDP8p84,Festival of Fountains,2025-05-09T13:00:00Z,2025-09-28T22:00:00Z,festivals,,100,100,600000.0,[-75.67148704 39.87429309],12301200,"[array(['6295630', '6255149', '6252001', '6254...",2025-06-02T23:09:48Z
1,4Vq9nLagBjR9XgBp2g,The Blakeslee Concert Series,2025-07-11T04:00:00Z,2025-09-27T03:59:59Z,festivals,,50,79,1000.0,[-77.917968 33.982445],6739199,"[array(['6295630', '6255149', '6252001', '4482...",2025-07-07T01:39:42Z
2,CApUtCHFeYWEkYZVuw,Ebensburg Farmer's Market,2025-07-12T04:00:00Z,2025-09-21T03:59:59Z,festivals,,38,67,250.0,[-78.726938 40.485397],6134399,"[array(['6295630', '6255149', '6252001', '6254...",2025-05-26T01:58:03Z
3,BD75L8EkVDNZLhotY5,Saturday Market,2025-07-12T06:00:00Z,2025-09-21T05:59:59Z,festivals,,54,80,1500.0,[-109.678224 48.553614],6134399,"[array(['6295630', '6255149', '6252001', '5667...",2025-01-20T00:37:32Z
4,3iLeJtm7SiFZkj7sCP,SummerStage,2025-08-01T04:00:00Z,2025-10-10T03:59:59Z,festivals,,91,89,110000.0,[-73.9728461 40.77248093],6047999,"[array(['6295630', '6255149', '6252001', '5128...",2025-11-13T21:29:07Z



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 50 non-null     object 
 1   title              50 non-null     object 
 2   start              50 non-null     object 
 3   end                50 non-null     object 
 4   category           50 non-null     object 
 5   labels             0 non-null      float64
 6   rank               50 non-null     int64  
 7   local_rank         50 non-null     int64  
 8   phq_attendance     49 non-null     float64
 9   location           50 non-null     object 
 10  duration           50 non-null     int64  
 11  place_hierarchies  50 non-null     object 
 12  updated            50 non-null     object 
dtypes: float64(2), int64(3), object(8)
memory usage: 5.2+ KB

[C] Descriptive Statistics (Numeric):


Unnamed: 0,labels,rank,local_rank,phq_attendance,duration
count,0.0,50.0,50.0,49.0,50.0
mean,,71.6,88.42,110975.959184,3090707.0
std,,20.966445,17.393513,229139.969164,2291740.0
min,,0.0,0.0,250.0,431999.0
25%,,56.0,81.0,2000.0,1576799.0
50%,,74.5,100.0,17000.0,2505599.0
75%,,89.0,100.0,100000.0,4795199.0
max,,100.0,100.0,1000000.0,12301200.0



DATASET: phq_events_full_20251207_060912.csv

[A] Head (First 5 Rows):


Unnamed: 0,id,title,start,end,category,labels,rank,local_rank,phq_attendance,location,updated
0,3Ad6pUgSsDZ9YyCuGu,Deer Tick,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,32,64,120.0,[-70.7040084 42.0919348],2025-11-18T20:12:23Z
1,3NKX7YnztkZp6zvndF,Watchtower,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,39,62,287.0,[-97.3951022 27.7977195],2025-09-22T00:14:57Z
2,3NjBcgX3Hewhk4SgRe,Max Stalling and Heather Stalling,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,36,59,207.0,[-90.0249 35.0611],2025-09-22T00:09:59Z
3,3UDqEwrBU5ZZ6MPmhD,Sunday Contra at the Monday Club- September,2025-09-08T00:00:00Z,2025-09-08T02:30:00Z,concerts,,39,65,289.0,[-90.355103 38.591047],2025-09-22T00:09:59Z
4,3dYkukhpwpErsSgxE5,Lulada Club,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,40,43,302.0,[-73.9573947 40.7218813],2025-12-05T21:57:38Z



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              30000 non-null  object 
 1   title           30000 non-null  object 
 2   start           30000 non-null  object 
 3   end             30000 non-null  object 
 4   category        30000 non-null  object 
 5   labels          0 non-null      float64
 6   rank            30000 non-null  int64  
 7   local_rank      30000 non-null  int64  
 8   phq_attendance  29714 non-null  float64
 9   location        30000 non-null  object 
 10  updated         30000 non-null  object 
dtypes: float64(2), int64(2), object(7)
memory usage: 2.5+ MB

[C] Descriptive Statistics (Numeric):


Unnamed: 0,labels,rank,local_rank,phq_attendance
count,0.0,30000.0,30000.0,29714.0
mean,,40.1662,63.5746,1664.236
std,,11.848158,13.699663,15467.69
min,,0.0,0.0,7.0
25%,,31.0,56.0,121.0
50%,,38.0,62.0,253.0
75%,,44.0,71.0,535.0
max,,100.0,100.0,1538461.0



DATASET: setlistfm_us_concerts_20251207_080243.csv

[A] Head (First 5 Rows):


Unnamed: 0,event_id,event_date,event_last_updated,artist_name,artist_mbid,tour_name,setlist_length,venue_name,venue_id,venue_city,venue_state,venue_country,lat,lng,url
0,b4fe126,09-12-2025,2025-11-07T14:44:06.526+0000,Thunderchief,43cd3ca4-441b-41c4-afa8-e570f110f3d8,,0,West Side Bowl,53d2c799,Youngstown,Ohio,US,41.09978,-80.649519,https://www.setlist.fm/setlist/thunderchief/20...
1,b4f51e6,09-12-2025,2025-11-19T14:31:20.162+0000,John Craigie,89b381cf-89f4-43d7-b85b-f53efb2bd1a8,,0,Animas City Theatre,63d2ca6f,Durango,Colorado,US,37.27528,-107.880067,https://www.setlist.fm/setlist/john-craigie/20...
2,b4f1156,09-12-2025,2025-12-07T02:06:21.479+0000,BIG ASS TRUCK I.E.,b0caf649-6cbb-4279-a511-d5b5a0a31a93,,0,Vino's,1bd629ec,Little Rock,Arkansas,US,34.746481,-92.289595,https://www.setlist.fm/setlist/big-ass-truck-i...
3,b4f114a,09-12-2025,2025-11-14T06:54:02.205+0000,MAZENKO,b1ac5a5e-273b-480c-8425-cbbbedc84ba5,,0,Vino's,1bd629ec,Little Rock,Arkansas,US,34.746481,-92.289595,https://www.setlist.fm/setlist/mazenko/2025/vi...
4,b4f0996,09-12-2025,2025-11-14T23:34:49.987+0000,Mark Tremonti,bbb1e25d-5ae6-4cdc-82ee-6c47736e61ae,Mark Tremonti Sings Frank Sinatra,0,Des Plaines Theatre,5bd02b3c,Des Plaines,Illinois,US,42.033362,-87.883399,https://www.setlist.fm/setlist/mark-tremonti/2...



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   event_id            2000 non-null   object 
 1   event_date          2000 non-null   object 
 2   event_last_updated  2000 non-null   object 
 3   artist_name         2000 non-null   object 
 4   artist_mbid         2000 non-null   object 
 5   tour_name           553 non-null    object 
 6   setlist_length      2000 non-null   int64  
 7   venue_name          1999 non-null   object 
 8   venue_id            2000 non-null   object 
 9   venue_city          2000 non-null   object 
 10  venue_state         2000 non-null   object 
 11  venue_country       2000 non-null   object 
 12  lat                 2000 non-null   float64
 13  lng                 2000 non-null   float64
 14  url                 2000 non-null   object 
dtypes

Unnamed: 0,setlist_length,lat,lng
count,2000.0,2000.0,2000.0
mean,2.546,37.475856,-91.273896
std,6.062447,4.985458,16.636323
min,0.0,25.774266,-124.08284
25%,0.0,33.748995,-98.493628
50%,0.0,38.895,-86.80249
75%,0.0,41.415362,-78.476678
max,44.0,47.925257,120.0



DATASET: bandsintown_artist_slugs.csv

[A] Head (First 5 Rows):


Unnamed: 0,artist_slug



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   artist_slug  0 non-null      object
dtypes: object(1)
memory usage: 132.0+ bytes

[C] Descriptive Statistics (Numeric):


Unnamed: 0,artist_slug
count,0.0
unique,0.0
top,
freq,



DATASET: musicbrainz_structured_features.csv

[A] Head (First 5 Rows):


Unnamed: 0,mbid,name,artist_names,start_date,place_name,tags,type
0,b971d65c-8a56-485f-971f-e2d22bf2b88b,Jazzchor der Universität Bonn at Trinitatiskirche,,2014-07-23,,[],Concert
1,f5d30095-5c4d-4a6a-8a50-65d7c56d8515,Jazzchor der Universität Bonn at Theatersaal d...,,2014-07-25,,[],Concert
2,054e5968-5f94-435b-a07a-785bf17636a9,Markus Land Quintett at Bonn Hbf/Thomas-Mann-S...,,2014-09-26,,[],Concert
3,29d57bf5-2aba-40fa-9596-9638728a3df6,Sax in the City at Bonn Hbf/Thomas-Mann-Straße,,2014-05-23,,[],Concert
4,a637d8db-9b49-4b30-ad07-07dad7eb914e,Merel Quartett at Felsenkapelle St. Michael,,2014-07-20,,[],Concert



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98389 entries, 0 to 98388
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mbid          98389 non-null  object 
 1   name          98389 non-null  object 
 2   artist_names  0 non-null      float64
 3   start_date    98034 non-null  object 
 4   place_name    0 non-null      float64
 5   tags          98389 non-null  object 
 6   type          98389 non-null  object 
dtypes: float64(2), object(5)
memory usage: 5.3+ MB

[C] Descriptive Statistics (Numeric):


Unnamed: 0,artist_names,place_name
count,0.0,0.0
mean,,
std,,
min,,
25%,,
50%,,
75%,,
max,,



DATASET: setlistfm_us_concerts_20251207_074105.csv

[A] Head (First 5 Rows):


Unnamed: 0,event_id,event_date,event_last_updated,artist_name,artist_mbid,tour_name,venue_name,venue_id,venue_city,venue_state,venue_country,lat,lng,sets,info,url
0,b4fe126,09-12-2025,2025-11-07T14:44:06.526+0000,Thunderchief,43cd3ca4-441b-41c4-afa8-e570f110f3d8,,West Side Bowl,53d2c799,Youngstown,Ohio,US,41.09978,-80.649519,"{'set': array([], dtype=object)}",,https://www.setlist.fm/setlist/thunderchief/20...
1,b4f51e6,09-12-2025,2025-11-19T14:31:20.162+0000,John Craigie,89b381cf-89f4-43d7-b85b-f53efb2bd1a8,,Animas City Theatre,63d2ca6f,Durango,Colorado,US,37.27528,-107.880067,"{'set': array([], dtype=object)}",,https://www.setlist.fm/setlist/john-craigie/20...
2,b4f1156,09-12-2025,2025-12-07T02:06:21.479+0000,BIG ASS TRUCK I.E.,b0caf649-6cbb-4279-a511-d5b5a0a31a93,,Vino's,1bd629ec,Little Rock,Arkansas,US,34.746481,-92.289595,"{'set': array([], dtype=object)}",,https://www.setlist.fm/setlist/big-ass-truck-i...
3,b4f114a,09-12-2025,2025-11-14T06:54:02.205+0000,MAZENKO,b1ac5a5e-273b-480c-8425-cbbbedc84ba5,,Vino's,1bd629ec,Little Rock,Arkansas,US,34.746481,-92.289595,"{'set': array([], dtype=object)}",,https://www.setlist.fm/setlist/mazenko/2025/vi...
4,b4f0996,09-12-2025,2025-11-14T23:34:49.987+0000,Mark Tremonti,bbb1e25d-5ae6-4cdc-82ee-6c47736e61ae,Mark Tremonti Sings Frank Sinatra,Des Plaines Theatre,5bd02b3c,Des Plaines,Illinois,US,42.033362,-87.883399,"{'set': array([], dtype=object)}",,https://www.setlist.fm/setlist/mark-tremonti/2...



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 16 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   event_id            20 non-null     object 
 1   event_date          20 non-null     object 
 2   event_last_updated  20 non-null     object 
 3   artist_name         20 non-null     object 
 4   artist_mbid         20 non-null     object 
 5   tour_name           11 non-null     object 
 6   venue_name          20 non-null     object 
 7   venue_id            20 non-null     object 
 8   venue_city          20 non-null     object 
 9   venue_state         20 non-null     object 
 10  venue_country       20 non-null     object 
 11  lat                 20 non-null     float64
 12  lng                 20 non-null     float64
 13  sets                20 non-null     object 
 14  info                0 non-null      float64
 15  url  

Unnamed: 0,lat,lng,info
count,20.0,20.0,0.0
mean,36.700308,-97.705338,
std,4.498151,16.998374,
min,29.651634,-122.676207,
25%,33.892692,-113.492982,
50%,37.245303,-92.793919,
75%,40.11524,-82.227405,
max,45.523452,-75.163789,



DATASET: phq_events_full_20251207_065623.csv

[A] Head (First 5 Rows):


Unnamed: 0,id,title,start,end,category,labels,rank,local_rank,phq_attendance,location,updated
0,3Ad6pUgSsDZ9YyCuGu,Deer Tick,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,32,64,120.0,[-70.7040084 42.0919348],2025-11-18T20:12:23Z
1,3NKX7YnztkZp6zvndF,Watchtower,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,39,62,287.0,[-97.3951022 27.7977195],2025-09-22T00:14:57Z
2,3NjBcgX3Hewhk4SgRe,Max Stalling and Heather Stalling,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,36,59,207.0,[-90.0249 35.0611],2025-09-22T00:09:59Z
3,3UDqEwrBU5ZZ6MPmhD,Sunday Contra at the Monday Club- September,2025-09-08T00:00:00Z,2025-09-08T02:30:00Z,concerts,,39,65,289.0,[-90.355103 38.591047],2025-09-22T00:09:59Z
4,3dYkukhpwpErsSgxE5,Lulada Club,2025-09-08T00:00:00Z,2025-09-08T00:00:00Z,concerts,,40,43,302.0,[-73.9573947 40.7218813],2025-12-05T21:57:38Z



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              30000 non-null  object 
 1   title           30000 non-null  object 
 2   start           30000 non-null  object 
 3   end             30000 non-null  object 
 4   category        30000 non-null  object 
 5   labels          0 non-null      float64
 6   rank            30000 non-null  int64  
 7   local_rank      30000 non-null  int64  
 8   phq_attendance  29714 non-null  float64
 9   location        30000 non-null  object 
 10  updated         30000 non-null  object 
dtypes: float64(2), int64(2), object(7)
memory usage: 2.5+ MB

[C] Descriptive Statistics (Numeric):


Unnamed: 0,labels,rank,local_rank,phq_attendance
count,0.0,30000.0,30000.0,29714.0
mean,,40.162667,63.576267,1663.526
std,,11.847428,13.701072,15465.9
min,,0.0,0.0,7.0
25%,,31.0,56.0,120.25
50%,,38.0,62.0,253.0
75%,,44.0,71.0,534.75
max,,100.0,100.0,1538461.0



DATASET: setlistfm_us_concerts_20251207_080539.csv

[A] Head (First 5 Rows):


Unnamed: 0,event_id,event_date,event_last_updated,artist_name,artist_mbid,tour_name,setlist_length,venue_name,venue_id,venue_city,venue_state,venue_country,lat,lng,url
0,b4fe126,09-12-2025,2025-11-07T14:44:06.526+0000,Thunderchief,43cd3ca4-441b-41c4-afa8-e570f110f3d8,,0,West Side Bowl,53d2c799,Youngstown,Ohio,US,41.09978,-80.649519,https://www.setlist.fm/setlist/thunderchief/20...
1,b4f51e6,09-12-2025,2025-11-19T14:31:20.162+0000,John Craigie,89b381cf-89f4-43d7-b85b-f53efb2bd1a8,,0,Animas City Theatre,63d2ca6f,Durango,Colorado,US,37.27528,-107.880067,https://www.setlist.fm/setlist/john-craigie/20...
2,b4f1156,09-12-2025,2025-12-07T02:06:21.479+0000,BIG ASS TRUCK I.E.,b0caf649-6cbb-4279-a511-d5b5a0a31a93,,0,Vino's,1bd629ec,Little Rock,Arkansas,US,34.746481,-92.289595,https://www.setlist.fm/setlist/big-ass-truck-i...
3,b4f114a,09-12-2025,2025-11-14T06:54:02.205+0000,MAZENKO,b1ac5a5e-273b-480c-8425-cbbbedc84ba5,,0,Vino's,1bd629ec,Little Rock,Arkansas,US,34.746481,-92.289595,https://www.setlist.fm/setlist/mazenko/2025/vi...
4,b4f0996,09-12-2025,2025-11-14T23:34:49.987+0000,Mark Tremonti,bbb1e25d-5ae6-4cdc-82ee-6c47736e61ae,Mark Tremonti Sings Frank Sinatra,0,Des Plaines Theatre,5bd02b3c,Des Plaines,Illinois,US,42.033362,-87.883399,https://www.setlist.fm/setlist/mark-tremonti/2...



[B] Info (Data Types and Missing Counts per Column):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000 entries, 0 to 3999
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   event_id            4000 non-null   object 
 1   event_date          4000 non-null   object 
 2   event_last_updated  4000 non-null   object 
 3   artist_name         4000 non-null   object 
 4   artist_mbid         4000 non-null   object 
 5   tour_name           1146 non-null   object 
 6   setlist_length      4000 non-null   int64  
 7   venue_name          3999 non-null   object 
 8   venue_id            4000 non-null   object 
 9   venue_city          4000 non-null   object 
 10  venue_state         4000 non-null   object 
 11  venue_country       4000 non-null   object 
 12  lat                 4000 non-null   float64
 13  lng                 4000 non-null   float64
 14  url                 4000 non-null   object 
dtypes

Unnamed: 0,setlist_length,lat,lng
count,4000.0,4000.0,4000.0
mean,4.1415,37.770741,-91.210832
std,7.420293,4.981224,18.344074
min,0.0,20.89111,-157.858333
25%,0.0,34.052,-98.493628
50%,0.0,39.099727,-86.784443
75%,7.0,41.499495,-78.638615
max,47.0,61.32139,120.0
