## Połączenie z bazą SQL

In [3]:
try:

    def connect():
        import psycopg2
        return psycopg2.connect(
            host="localhost",
            database="musicbrainz",
            user="postgres",
            password="123"
        )
    
    CONNECTION = connect()

    print("Połączono z bazą danych")

    def SQL(q, params=None):
        from pandas import read_sql_query
        import warnings
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=UserWarning)
            
            # UserWarning: pandas only supports SQLAlchemy connectable 
            # (engine/connection) or database string URI or sqlite3 DBAPI2 connection. 
            # Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
            
            return read_sql_query(q, CONNECTION, params=params)
        
    def SQL_silent(q, params=None):
        try:
            cur = CONNECTION.cursor()
            cur.execute(q, params)
            CONNECTION.commit()
            return cur.rowcount
        except Exception as e:
            print("Błąd:", e)
            return None
    
except Exception as e:
    print("Nie udało się połączyć z bazą danych. Błąd:", e)

Połączono z bazą danych


## Baza relacji

In [4]:
SQL("""
    
    BEGIN;

        --DROP TABLE IF EXISTS features_pl;
        CREATE TABLE IF NOT EXISTS features_pl (
            track INT NOT NULL,
            artist INT NOT NULL
        );
        
        WITH pl AS (SELECT id FROM musicbrainz.area WHERE UPPER(name)='POLAND')
        INSERT INTO features_pl (track, artist)
        SELECT
            tra.id AS track, art.id AS artist
        FROM musicbrainz.track AS tra
        LEFT JOIN musicbrainz.artist_credit AS cre 
            ON cre.id = tra.artist_credit
        LEFT JOIN musicbrainz.artist_credit_name AS cre_many2many 
            ON cre.id = cre_many2many.artist_credit
        LEFT JOIN musicbrainz.artist AS art
            ON art.id = cre_many2many.artist
        WHERE art.area IN (SELECT id FROM pl);
    
    COMMIT;

    SELECT
        column_name, data_type, is_nullable
    FROM
        information_schema.columns
    WHERE
        table_name = 'features_pl';
    
""").set_index('column_name')

Unnamed: 0_level_0,data_type,is_nullable
column_name,Unnamed: 1_level_1,Unnamed: 2_level_1
track,integer,NO
artist,integer,NO


## Spotify

In [5]:
import spotipy, requests
from spotipy.oauth2 import SpotifyClientCredentials

def load_creds():
    from dotenv import load_dotenv
    load_dotenv()
    
load_creds()

client_credentials_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

## Baza spotify

In [6]:
SQL("""
    
    BEGIN;

        --DROP TABLE IF EXISTS spotify;
        CREATE TABLE IF NOT EXISTS spotify (
            id VARCHAR PRIMARY KEY,
            track INT REFERENCES musicbrainz.track(id) NOT NULL,
            popularity FLOAT,
            fetch_date DATE DEFAULT CURRENT_DATE
        );
    
    COMMIT;

    SELECT
        column_name, data_type, is_nullable
    FROM
        information_schema.columns
    WHERE
        table_name = 'spotify';

""")

Unnamed: 0,column_name,data_type,is_nullable
0,id,character varying,NO
1,track,integer,NO
2,popularity,double precision,YES
3,fetch_date,date,YES


## Pobieranie relacji

In [7]:
def get_artist_tracks_relations(artist:str):

    artist = artist.upper()

    relations_tracks_artists = SQL("""

        WITH

            target AS (
                SELECT id 
                FROM musicbrainz.artist 
                WHERE UPPER(name)=%s
            ),
        
            output AS (
                SELECT track
                FROM features_pl
                WHERE artist IN (SELECT id FROM target)
            ),

            colab AS (
                SELECT DISTINCT artist FROM features_pl 
                WHERE track IN (SELECT track FROM output)
            )
        
        SELECT * FROM features_pl 
            WHERE artist IN (SELECT artist FROM colab);

    """, (artist,))

    artists_labels = SQL("""

        WITH

            target AS (
                SELECT id 
                FROM musicbrainz.artist 
                WHERE UPPER(name)=%s
            ),
        
            output AS (
                SELECT track
                FROM features_pl
                WHERE artist IN (SELECT id FROM target)
            ),

            colab AS (
                SELECT DISTINCT artist FROM features_pl 
                WHERE track IN (SELECT track FROM output)
            )
        
        SELECT id, name FROM musicbrainz.artist
            WHERE id IN (SELECT artist FROM colab);

    """, (artist,)).set_index('id')

    tracks_labels = SQL("""

        WITH

            target AS (
                SELECT id 
                FROM musicbrainz.artist 
                WHERE UPPER(name)=%s
            ),
        
            output AS (
                SELECT track
                FROM features_pl
                WHERE artist IN (SELECT id FROM target)
            ),

            colab AS (
                SELECT DISTINCT artist FROM features_pl 
                WHERE track IN (SELECT track FROM output)
            )
        
        SELECT 
            DISTINCT features_pl.track AS id, 
            track.name, release.name as release,
            date.date_year as year,
            date.date_month as month,
            date.date_day as day
            
            FROM features_pl
        LEFT JOIN musicbrainz.track AS track
            ON track.id = features_pl.track

        LEFT JOIN musicbrainz.medium AS medium
            ON medium.id = track.medium

        LEFT JOIN musicbrainz.release AS release
            ON release.id = medium.release
                   
        LEFT JOIN musicbrainz.release_country AS date
            ON release.id = date.release

        WHERE artist IN (SELECT artist FROM colab);

    """, (artist,)).set_index('id')

    return relations_tracks_artists, artists_labels, tracks_labels


## Ułożenie grafu

In [8]:
def get_graph_layout(df, source, target):

    import networkx as nx
    import pandas as pd
    graph = nx.from_pandas_edgelist(df, source, target)
    pos = nx.spring_layout(graph)

    nodes = []
    for node in graph.nodes:
        nodes.append({ 'id': node, 'x': pos[node][0], 'y': pos[node][1] })

    edges = {
        'x': [coord for edge in graph.edges for coord in (pos[edge[0]][0], pos[edge[1]][0], None)], 
        'y': [coord for edge in graph.edges for coord in (pos[edge[0]][1], pos[edge[1]][1], None)]
    }

    return pd.DataFrame(nodes).set_index('id'), pd.DataFrame(edges)

In [9]:
def get_artist_track_graph(artist:str):

    relations, artists_labels, tracks_labels = get_artist_tracks_relations(artist)
    nodes, edges = get_graph_layout(relations, 'track', 'artist')

    return relations, nodes.join(artists_labels, how='right'), nodes.join(tracks_labels, how='right'), edges

## Pobranie popularności

In [69]:
def spotify_tracks_popularity(relations, artist_nodes, track_nodes):

    import pandas as pd
    from tqdm.notebook import tqdm

    searches = (track_nodes
        .rename(columns={'name': 'title'})
        .merge(relations.groupby('track').first(), right_on='track', left_index=True)
        .merge(artist_nodes, how='left', right_index=True, left_on='artist')
    )[['title', 'release', 'name']]

    existing = SQL("SELECT * FROM spotify WHERE track IN %s", (tuple(searches.index),))

    ids, popularity = ([], [])
    for i, row in tqdm(searches.iterrows(), total=searches.shape[0]):
        
        if i in existing['track'].values:
            ids.append(existing.loc[existing['track'] == i, 'id'].values[0])
            popularity.append(existing.loc[existing['track'] == i, 'popularity'].values[0])
            continue
        
        q = f"track:{row['title']} album:{row['release']}"
        results = sp.search(q=q, type='track')
        items = results['tracks']['items']
        if items:
            track = items[0]
            track_info = sp.track(track['id'])

            if row['name'].upper() not in [artist['name'].upper() for artist in track_info['artists']]:
                print("Ostrzeżenie: znaleziono utwór", track['name'], "ale artysta to", track_info['artists'][0]['name'])
                ids.append(None)
                popularity.append(None)
                continue

            ids.append(track['id'])
            popularity.append(track_info['popularity'])

            SQL_silent("""
                BEGIN;
                    INSERT INTO spotify (id, track, popularity)
                    VALUES (%s, %s, %s)
                    ON CONFLICT (id) DO UPDATE SET popularity = EXCLUDED.popularity;
                COMMIT;
            """, (track['id'], i, track_info['popularity']))

        else:
            ids.append(None)
            popularity.append(None)
    
    return pd.DataFrame({ 'id': ids, 'popularity': popularity }, index=searches.index)

In [16]:
def get_artist_track_popularity_graph(artist:str):

    relations, artists_labels, tracks_labels = get_artist_tracks_relations(artist)
    nodes, edges = get_graph_layout(relations, 'track', 'artist')
    popularity = spotify_tracks_popularity(relations, artists_labels, tracks_labels)

    tracks = nodes.join(tracks_labels, how='right')
    tracks = tracks.join(popularity, how='left')

    artists = nodes.join(artists_labels, how='right')
    artists['year'] = (tracks.
        merge(relations, how='left', left_index=True, right_on='track').
        groupby('artist')
    )['year'].min()

    return relations, artists, tracks, edges

## Graf

In [57]:
def find_z_for_edges(edges, frames):
    for i, edge in edges.iterrows():
        for frame in frames:
            point = frame[(frame['x'] == edge['x']) & (frame['y'] == edge['y'])]
            if not point.empty:
                edges.at[i, 'z'] = point['year'].values[0]
                break

    return edges

def plot_graph(edges, artists, tracks_pop, tracks_not_pop):

    import plotly.graph_objects as go

    edges['z'] = None
    find_z_for_edges(edges, [artists, tracks_pop, tracks_not_pop])

    fig = go.Figure().update_layout(
        title=f"Kolaboracje",
        legend=dict(x=0, y=0, bordercolor="Black", borderwidth=1),
        margin=dict(l=0, r=0, b=0, t=0, pad=0),scene=dict(
            xaxis=dict(title='', showticklabels=False),
            yaxis=dict(title='', showticklabels=False),
            zaxis=dict(title='Rok')
        )
    )

    fig.add_trace(go.Scatter3d(
        name='krawędzie', hoverinfo='none', opacity=0.5,
        x=edges['x'], y=edges['y'],z=edges['z'],
        mode='lines', line=dict(width=0.5)
    ))

    fig.add_trace(go.Scatter3d(
        name='autorzy', hoverinfo='text', text=artists['name'],
        x=artists['x'], y=artists['y'], z=artists['year'],
        mode='markers', marker=dict(symbol='diamond', color='red', size=3)
    ))
    
    fig.add_trace(go.Scatter3d(
        name='tytuły', hoverinfo='text', text=tracks_pop['name'] + ' (' + tracks_pop['release'] + ')<br>' + 'popularność: ' + tracks_pop['popularity'].astype(str),
        x=tracks_pop['x'], y=tracks_pop['y'], z=tracks_pop['year'],
        mode='markers', marker=dict(symbol='circle', color='green', size=tracks_pop['popularity']/5, line=None)
    ))

    fig.add_trace(go.Scatter3d(
        name='tytuły o nieznanej popularności', hoverinfo='text', text=tracks_not_pop['name'],
        x=tracks_not_pop['x'], y=tracks_not_pop['y'], z=tracks_not_pop['year'],
        mode='markers', marker=dict(symbol='circle', color='black', size=2, opacity=0.5, line=None)
    ))

    return fig

## Case

In [74]:
relations, artists, tracks, edges = get_artist_track_popularity_graph('Myslovitz')
tracks_pop = tracks.dropna(subset=['popularity'])
tracks_not_pop = tracks[~tracks.index.isin(tracks_pop.index)]

  0%|          | 0/2197 [00:00<?, ?it/s]

Ostrzeżenie: znaleziono utwór Krakowski spleen ale artysta to Maanam
Ostrzeżenie: znaleziono utwór Mozart: Piano Concerto No. 21 in C Major, K. 467: II. Andante ale artysta to Wolfgang Amadeus Mozart
Ostrzeżenie: znaleziono utwór Blue Velvet ale artysta to Bobby Vinton
Ostrzeżenie: znaleziono utwór Stand Down Margaret - Dub ale artysta to The English Beat
Ostrzeżenie: znaleziono utwór Historia jednej znajomosci - The story of one meeting ale artysta to Ryszard Szwec Band
Ostrzeżenie: znaleziono utwór Nie dla ciebie - Mix by DeepDarek ale artysta to Spike
Ostrzeżenie: znaleziono utwór Be My Baby ale artysta to The Ronettes
Ostrzeżenie: znaleziono utwór Polowanie na wielbłąda ale artysta to Kamil Kowalski
Ostrzeżenie: znaleziono utwór KRAINA MIŁOŚCI ale artysta to Kaszubska Influencerka
Ostrzeżenie: znaleziono utwór Behind Closed Eyes ale artysta to Little Symphony
Ostrzeżenie: znaleziono utwór Wielki Gniew - From "Mały Zgon" ale artysta to Hotel Torino
Ostrzeżenie: znaleziono utwór Krót

In [75]:
plot_graph(edges, artists, tracks_pop, tracks_not_pop)