In [1]:
USE_DEVELOPMENT_DATABASE = True

In [2]:
%run pathutils.ipynb

In [3]:
VOCAL_MAP = {
    0: "Unknown",
    1: "Instrumental",
    2: "Mixed",
    3: "Vocal Led",
}

ENSEMBLE_MAP = {
    0: "Unknown",
    1: "Band",
    2: "Big Band",
    3: "Choir",
    4: "Orchestra",
    5: "Quartet",
    6: "Small Combo",
    7: "Solo",
    8: "Studio",
    9: "Trio",
}

In [4]:
import sqlite3
import os
from pathlib import Path

def connect():
    if USE_DEVELOPMENT_DATABASE:
        reports_root = get_project_root_folder()
        project_root = Path(reports_root).parent
        database_path = project_root / "data" / "musiccatalogue.db"
    else:
        database_path = os.environ["MUSIC_CATALOGUE_DB"]

    connection = sqlite3.connect(database_path)
    return connection

In [5]:
from pathlib import Path
import sqlparse

def load_sql(sql_file, placeholder_values = None, show_query = False):
    # Read the query file
    query_file_path = Path("sql") / sql_file
    with open(query_file_path.absolute(), "r") as f:
        query = f.read().replace("\n", " ")

    # Replace placeholders with real values
    if placeholder_values:
        for key, value in placeholder_values.items():
            query = query.replace(f"${key}", value)

    # Show a pretty-printed form of the query
    if show_query:
        print(sqlparse.format(query, reindent=True, keyword_case="upper"))
    return query


In [6]:
import pandas as pd

def load_genre_statistics(connection):
    query = load_sql("genre-statistics.sql")
    df = pd.read_sql_query(query, connection)
    return df

In [7]:
import pandas as pd

def load_artist_statistics(connection):
    query = load_sql("artist-statistics.sql")
    df = pd.read_sql_query(query, connection)
    return df

In [8]:
import pandas as pd

def load_monthly_spend(connection):
    query = load_sql("monthly-spend.sql")
    df = pd.read_sql_query(query, connection)
    return df

In [9]:
import pandas as pd

def load_retailer_statistics(connection):
    query = load_sql("retailer-statistics.sql")
    df = pd.read_sql_query(query, connection)
    df.drop(columns=["Id"], inplace=True)
    return df

In [10]:
def load_artists(connection):
    query = load_sql("artists.sql")
    df = pd.read_sql_query(query, connection)
    df["VocalPresence"] = df["Vocals"].map(VOCAL_MAP).fillna("Unknown")
    df["EnsembleType"]  = df["Ensemble"].map(ENSEMBLE_MAP).fillna("Unknown")
    return df

In [11]:
def load_artist_moods(connection):
    query = load_sql("artist-moods.sql")
    df = pd.read_sql_query(query, connection)
    return df

In [12]:
def load_mood_aggregates(connection):
    query = load_sql("mood-aggregates.sql")
    df = pd.read_sql_query(query, connection)
    return df