In [None]:
# import libraries and dependencies
import os
import requests
import mysql.connector
import datetime
import pandas as pd
import matplotlib.pyplot as plt

from dotenv import load_dotenv
from mysql.connector import Error
from datetime import datetime, timedelta

In [None]:
# load environment variables from .env file
load_dotenv()

In [None]:
# api endpoints and keys
TICKETMASTER_API_KEY = os.getenv('TICKETMASTER_API_KEY')
TOMORROW_API_KEY = os.getenv('TOMORROW_API_KEY')

TICKETMASTER_Events_API_Endpoint = "https://app.ticketmaster.com/discovery/v2/events.json"
# Note: Austin’s weather API endpoint will be constructed via parameters below.

In [None]:
# MySQL connection parameters
MYSQL_HOST = os.getenv('MYSQL_HOST')
MYSQL_USER = os.getenv('MYSQL_USER')
MYSQL_PASSWORD = os.getenv('MYSQL_PASSWORD')
MYSQL_DATABASE = os.getenv('MYSQL_DATABASE')

In [None]:
# function to handle the database connection
def get_db_connection():
    """Establish and return a MySQL database connection."""
    try:
        connection = mysql.connector.connect(
            host=MYSQL_HOST,
            user=MYSQL_USER,
            password=MYSQL_PASSWORD,
            database=MYSQL_DATABASE
        )
        if connection.is_connected():
            print("Connected to MySQL database")
            return connection
    except Error as e:
        print(f"Database connection error: {e}")
    return None

In [None]:
# cynthias events and venues code in a function
def get_events_and_venues_data():
    """
    Pull Ticketmaster events for San Diego for the next 7 days,
    then parse and return two DataFrames:
      - events_df: event_id, event_name, start_date, venue_id, event_category
      - venues_df: venue_id, venue_name, venue_city, venue_state, venue_country, venue_location
    """
    # Define date range: today to 6 days later (7 days total)
    today = datetime.today()
    six_days_later = today + timedelta(days=6)
    start_date = today.strftime('%Y-%m-%dT00:00:00Z')
    end_date = six_days_later.strftime('%Y-%m-%dT23:59:59Z')
    
    params = {
        "apikey": TICKETMASTER_API_KEY,
        "city": "San Diego",
        "size": 200,
        "startDateTime": start_date,
        "endDateTime": end_date
    }
    
    print("Requesting Ticketmaster events...")
    response = requests.get(TICKETMASTER_Events_API_Endpoint, params=params)
    if response.status_code != 200:
        raise Exception(f"Ticketmaster API Error: {response.status_code}, {response.text}")
    
    data = response.json()
    events = data.get("_embedded", {}).get("events", [])
    if not events:
        print("No events found.")
        return pd.DataFrame(), pd.DataFrame()
    
    # Convert list of events to DataFrame
    df = pd.DataFrame(events)
    
    # Extract Event details
    df['event_id'] = df['id']
    df['event_name'] = df['name']
    # Use 'dates.start.localDate' as our start_date (could be refined to include time if available)
    df['start_date'] = df['dates'].apply(lambda x: x['start'].get('localDate') if isinstance(x, dict) and 'start' in x else None)
    df['event_category'] = df['classifications'].apply(lambda x: x[0]['segment']['name'] if isinstance(x, list) and len(x) > 0 and 'segment' in x[0] else None)
    
    # Extract Venue details
    df['venue_id'] = df['_embedded'].apply(lambda x: x['venues'][0]['id'] if isinstance(x, dict) and 'venues' in x else None)
    df['venue_name'] = df['_embedded'].apply(lambda x: x['venues'][0]['name'] if isinstance(x, dict) and 'venues' in x else None)
    df['venue_city'] = df['_embedded'].apply(lambda x: x['venues'][0]['city']['name'] if isinstance(x, dict) and 'venues' in x else None)
    df['venue_state'] = df['_embedded'].apply(lambda x: x['venues'][0]['state']['stateCode'] if isinstance(x, dict) and 'venues' in x and 'state' in x['venues'][0] else None)
    df['venue_country'] = df['_embedded'].apply(lambda x: x['venues'][0]['country']['countryCode'] if isinstance(x, dict) and 'venues' in x else None)
    # Combine latitude and longitude into one "venue_location" column (as requested)
    df['venue_location'] = df['_embedded'].apply(
        lambda x: f"{x['venues'][0]['location']['latitude']},{x['venues'][0]['location']['longitude']}" 
        if isinstance(x, dict) and 'venues' in x and 'location' in x['venues'][0] else None
    )
    
    # Create DataFrames for events and venues
    events_df = df[['event_id', 'event_name', 'start_date', 'venue_id', 'event_category']]
    venues_df = df[['venue_id', 'venue_name', 'venue_city', 'venue_state', 'venue_country', 'venue_location']].drop_duplicates()
    
    print(f"Pulled {len(events_df)} events from API.")
    return events_df, venues_df

In [None]:
# inserting cynthias events and venues data into the database
def insert_events_and_venues(events_df, venues_df, connection):
    """
    Insert venues and events data into MySQL.
    Before insertion, delete any past events.
    """
    try:
        cursor = connection.cursor()
        
        # Delete past events (using 'start_date' column)
        delete_query = "DELETE FROM events WHERE start_date < CURDATE();"
        cursor.execute(delete_query)
        print(f"Deleted {cursor.rowcount} past events.")
        connection.commit()
        
        # Insert/update venues
        insert_venue_query = """
        INSERT INTO venues (venue_id, name, city, state, country, location)
        VALUES (%s, %s, %s, %s, %s, %s)
        ON DUPLICATE KEY UPDATE 
            name = VALUES(name),
            city = VALUES(city),
            state = VALUES(state),
            country = VALUES(country),
            location = VALUES(location);
        """
        for _, row in venues_df.iterrows():
            cursor.execute(insert_venue_query, tuple(row))
        connection.commit()
        print("Venues inserted/updated successfully.")
        
        # Insert/update events
        # Note: our events table expects columns: event_id, name, start_date, venue_id, and a category column.
        insert_event_query = """
        INSERT INTO events (event_id, name, start_date, venue_id, category)
        VALUES (%s, %s, %s, %s, %s)
        ON DUPLICATE KEY UPDATE 
            name = VALUES(name),
            start_date = VALUES(start_date),
            venue_id = VALUES(venue_id),
            category = VALUES(category);
        """
        for _, row in events_df.iterrows():
            cursor.execute(insert_event_query, tuple(row))
        connection.commit()
        print("Events inserted/updated successfully.")
        
        cursor.close()
    except Error as e:
        print(f"Error inserting events/venues: {e}")

In [None]:
# austins weather code in a function
def get_weather_forecast_data():
    """
    Pull hourly weather forecasts for San Diego for the next week.
    Process the data into a DataFrame that matches the new weather_forecasts schema.
    Schema includes:
        forecast_time (DATETIME),
        forecast_temperature (DECIMAL),
        forecast_temp_apparent (DECIMAL),
        forecast_humidity (DECIMAL),
        rain_intensity (DECIMAL),
        forecast_winds (DECIMAL),
        weather_code (INT),
        weather_icon (TEXT)
    """
    # Define date range (today to 6 days later)
    today = datetime.today()
    next_7_days = today + timedelta(days=6)
    start_date = today.strftime('%Y-%m-%dT00:00:00Z')
    end_date = next_7_days.strftime('%Y-%m-%dT23:59:59Z')
    
    print("Requesting Tomorrow.io weather forecast data...")
    params = {
        "apikey": TOMORROW_API_KEY,
        "city": "San Diego",
        "startDateTime": start_date,
        "endDateTime": end_date
    }
    # Construct endpoint URL (Austin’s member hard-coded an endpoint; here we use it)
    endpoint = "https://api.tomorrow.io/v4/weather/forecast"
    response = requests.get(endpoint, params=params)
    
    if response.status_code != 200:
        print(f"Weather API call failed: {response.status_code}, {response.text}")
        return pd.DataFrame()
    
    data = response.json()
    
    # Parse the weather forecast data.
    # Assume the JSON structure contains an hourly timeline under data['timelines']['hourly']
    weather_data = []
    # Define mapping from weather code to icon filename (as provided)
    weather_code_icons = {
        1100: "mostly_clear_day.svg",
        1101: "partly_cloudy.svg",
        1102: "mostly_cloudy.svg",
        1001: "cloudy.svg",
        2100: "fog_light.svg",
        2000: "fog.svg",
        4000: "drizzle.svg",
        4200: "rain_light.svg",
        4001: "rain.svg",
        4201: "rain_heavy.svg",
        6000: "freezing_drizzle.svg",
        6200: "freezing_rain_light.svg",
        6001: "freezing_rain.svg",
        6201: "freezing_rain_heavy.svg",
        8000: "tstorm.svg",
    }
    base_url = "https://raw.githubusercontent.com/tomorrow-io-api/tomorrow-weather-codes/main/icons/"
    
    # Loop over hourly forecasts (if structure is as expected)
    try:
        intervals = data['timelines']['hourly']
    except KeyError:
        print("Unexpected weather API response structure.")
        return pd.DataFrame()
    
    for interval in intervals:
        # Each interval should have a 'time' and 'values'
        time_str = interval.get('time')
        # Convert time string (assumed in ISO format) to MySQL DATETIME string format
        forecast_time = pd.to_datetime(time_str).strftime('%Y-%m-%d %H:%M:%S')
        values = interval.get('values', {})
        forecast_temperature = values.get('temperature')
        forecast_temp_apparent = values.get('temperatureApparent')
        forecast_humidity = values.get('humidity')
        rain_intensity = values.get('rainIntensity')
        forecast_winds = values.get('windSpeed')
        weather_code = values.get('weatherCode')
        # Get icon based on weather_code mapping
        icon_filename = weather_code_icons.get(weather_code, "default.png")
        weather_icon = f"{base_url}{icon_filename}"
        
        weather_data.append({
            'forecast_time': forecast_time,
            'forecast_temperature': forecast_temperature,
            'forecast_temp_apparent': forecast_temp_apparent,
            'forecast_humidity': forecast_humidity,
            'rain_intensity': rain_intensity,
            'forecast_winds': forecast_winds,
            'weather_code': weather_code,
            'weather_icon': weather_icon
        })
    
    weather_df = pd.DataFrame(weather_data)
    print(f"Retrieved {len(weather_df)} hourly weather records.")
    return weather_df

In [None]:
# inserting austins weather data into the database
def insert_weather_forecast_data(weather_df, connection):
    """Insert weather forecast data into the weather_forecasts table."""
    try:
        cursor = connection.cursor()
        insert_query = """
            INSERT INTO weather_forecasts (
                forecast_time, forecast_temperature, forecast_temp_apparent, 
                forecast_humidity, rain_intensity, forecast_winds, weather_code, weather_icon
            )
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
        """
        for row in weather_df.itertuples(index=False):
            cursor.execute(insert_query, (
                row.forecast_time, row.forecast_temperature, row.forecast_temp_apparent,
                row.forecast_humidity, row.rain_intensity, row.forecast_winds, 
                row.weather_code, row.weather_icon
            ))
        connection.commit()
        print(f"Inserted {cursor.rowcount} weather forecast records.")
        cursor.close()
    except Error as e:
        print(f"Error during weather data insertion: {e}")

In [None]:
# determine if we want this, but this is the visualization breakout that was in cynthias code
def visualize_data(events_df):
    """
    (Optional) Create sample visualizations for events.
    For example, plot number of events per venue and events by category.
    """
    # Group by venue_name and count unique events
    event_counts = events_df.groupby('event_name').size().reset_index(name='count')
    event_counts = event_counts.sort_values(by='count', ascending=False)
    
    plt.figure(figsize=(10, 6))
    plt.bar(event_counts['event_name'], event_counts['count'], color='skyblue')
    plt.xticks(rotation=45, ha='right')
    plt.xlabel('Event')
    plt.ylabel('Count')
    plt.title('Events Count')
    plt.tight_layout()
    plt.show()

In [None]:
# the main function tying it all together
def main():
    # Get a single database connection for the run
    connection = get_db_connection()
    if not connection:
        print("Exiting due to database connection error.")
        return

    # --- Process Events and Venues ---
    try:
        events_df, venues_df = get_events_and_venues_data()
        if not events_df.empty and not venues_df.empty:
            insert_events_and_venues(events_df, venues_df, connection)
        else:
            print("No events/venues data to insert.")
    except Exception as e:
        print(f"Error during events/venues processing: {e}")
    
    # --- Process Weather Forecasts ---
    try:
        weather_df = get_weather_forecast_data()
        if not weather_df.empty:
            insert_weather_forecast_data(weather_df, connection)
        else:
            print("No weather forecast data to insert.")
    except Exception as e:
        print(f"Error during weather forecast processing: {e}")
    
    # Optionally, visualize events data
    # visualize_data(events_df)
    
    connection.close()
    print("ETL pipeline executed successfully.")


if __name__ == "__main__":
    main()