Group Project

In [36]:
pip install mysql-connector-python

Note: you may need to restart the kernel to use updated packages.


In [29]:
import os
from dotenv import load_dotenv
import mysql.connector 
from mysql.connector import Error
import requests
import pandas as pd
from datetime import datetime, timedelta


In [30]:
load_dotenv()  # Load environment variables from .env file

True

In [31]:
TICKETMASTER_API_KEY = os.getenv('TICKETMASTER_API_KEY')
TOMORROW_API_KEY = os.getenv('TOMORROW_API_KEY')   

TICKETMASTER_Events_API_Endpoint = "https://app.ticketmaster.com/discovery/v2/events.json"
TOMORROW_API_Endpoint = "https://api.tomorrow.io/v4/weather/forecast"


In [32]:
MYSQL_HOST = os.getenv('MYSQL_HOST')
MYSQL_USER = os.getenv('MYSQL_USER')
MYSQL_PASSWORD = os.getenv('MYSQL_PASSWORD')
MYSQL_DATABASE = os.getenv('MYSQL_DATABASE')

conn = mysql.connector.connect(
    host=MYSQL_HOST,
    user=MYSQL_USER,
    password=MYSQL_PASSWORD,
    database=MYSQL_DATABASE
)

In [33]:
# In order to pull the next 7 days of events
# Get today's date
today = datetime.today()

# Get the date 6 days from today
six_days_later = today + timedelta(days=6)

# Format the dates in the correct format (e.g., 'YYYY-MM-DD')
start_date = today.strftime('%Y-%m-%dT00:00:00Z')
end_date = six_days_later.strftime('%Y-%m-%dT23:59:59Z')

In [34]:
# EVENTS API
# API pull with parameters
params = {
    "apikey": TICKETMASTER_API_KEY,       
    "city": "San Diego", 
    "size": 200,
    "startDateTime": start_date,  # Start date (today)
    "endDateTime": end_date      # End date (7 days later)                 
}

# Send the request to the API
response = requests.get(TICKETMASTER_Events_API_Endpoint, params=params)

In [None]:
cursor = conn.cursor()

if conn.is_connected():
    print("Connection is active")
else:
    print("Connection is not active")
    # Reconnect logic here


if response.status_code == 200:
    data = response.json()
    events = data.get("_embedded", {}).get("events", [])

    # Convert to DataFrame
    df = pd.DataFrame(events)

    # Extract event details
    df['event_id'] = df['id']
    df['event_name'] = df['name']
    df['event_start_date'] = df['dates'].apply(lambda x: x['start']['localDate'] if isinstance(x, dict) and 'start' in x else None)
    df['event_category'] = df['classifications'].apply(lambda x: x[0]['segment']['name'] if isinstance(x, list) and len(x) > 0 and 'segment' in x[0] else None
)


    # Extract venue details
    df['venue_id'] = df['_embedded'].apply(lambda x: x['venues'][0]['id'] if isinstance(x, dict) and 'venues' in x else None)
    df['venue_name'] = df['_embedded'].apply(lambda x: x['venues'][0]['name'] if isinstance(x, dict) and 'venues' in x else None)
    df['venue_city'] = df['_embedded'].apply(lambda x: x['venues'][0]['city']['name'] if isinstance(x, dict) and 'venues' in x else None)
    df['venue_state'] = df['_embedded'].apply(lambda x: x['venues'][0]['state']['stateCode'] if isinstance(x, dict) and 'venues' in x and 'state' in x['venues'][0] else None)
    df['venue_zip'] = df['_embedded'].apply(lambda x: x['venues'][0].get('postalCode') if isinstance(x, dict) and 'venues' in x else None)
    df['venue_country'] = df['_embedded'].apply(lambda x: x['venues'][0]['country']['countryCode'] if isinstance(x, dict) and 'venues' in x else None)
    df['venue_location'] = df['_embedded'].apply(lambda x: f"{x['venues'][0]['location']['latitude']},{x['venues'][0]['location']['longitude']}" if isinstance(x, dict) and 'venues' in x and 'location' in x['venues'][0] else None
)





    # Select final columns for events and venues
    events_df = df[['event_id', 'event_name', 'event_start_date', 'venue_id', 'event_category']]
    venues_df = df[['venue_id', 'venue_name', 'venue_city', 'venue_state', 'venue_country', 'venue_location']].drop_duplicates()

    # Assuming `conn` is already connected
    try:

        # Insert venues (ignore duplicates)
        insert_venue_query = """
        INSERT INTO venues (venue_id, name, city, state, country, location)
        VALUES (%s, %s, %s, %s, %s, %s)
        ON DUPLICATE KEY UPDATE 
            name=VALUES(name), 
            city=VALUES(city), 
            state=VALUES(state),
            country=VALUES(country),
            location=VALUES(location);
        """

        for _, row in venues_df.iterrows():
            cursor.execute(insert_venue_query, tuple(row))
        conn.commit()  # Commit after inserting venues

        # Insert events (ignore duplicates)
        insert_event_query = """
        INSERT INTO events (event_id, name, start_date, venue_id, category)
        VALUES (%s, %s, %s, %s, %s)
        ON DUPLICATE KEY UPDATE 
            name=VALUES(name),
            start_date=VALUES(start_date),
            venue_id=VALUES(venue_id),
            category=VALUES(category);
        """

        for _, row in events_df.iterrows():
            cursor.execute(insert_event_query, tuple(row))
        conn.commit()  # Commit after inserting events

        
        print(f"Inserted {cursor.rowcount} records into MySQL!")

    except Error as e:
        print(f"Error: {e}")

    finally:
        if 'conn' in locals() and conn.is_connected():
            cursor.close()
            print("MySQL connection all set!")

else:
    print(f"API call failed with status {response.status_code}")
    print("Response details:", response.text)

Connection is active
Inserted 1 records into MySQL!
MySQL connection all set YAY!
