## Technikalia

In [1]:
try:

    def connect():
        import psycopg2
        return psycopg2.connect(
            host="localhost",
            database="musicbrainz",
            user="postgres",
            password="123"
        )
    
    CONNECTION = connect()

    print("Połączono z bazą danych")

    def SQL(q):
        from pandas import read_sql_query
        import warnings
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=UserWarning)
            
            # UserWarning: pandas only supports SQLAlchemy connectable 
            # (engine/connection) or database string URI or sqlite3 DBAPI2 connection. 
            # Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.
            
            return read_sql_query(q, CONNECTION)
    
except Exception as e:
    print("Nie udało się połączyć z bazą danych. Błąd:", e)

Połączono z bazą danych


## Relacje

Graf polskich artystów i ich utworów

In [78]:
SQL("""

    DROP TABLE IF EXISTS features_pl;

    CREATE TABLE features_pl AS
    WITH pl AS (SELECT id FROM musicbrainz.area WHERE UPPER(name)='POLAND')
    SELECT
        tra.id AS track, art.id AS artist
    
    FROM musicbrainz.track AS tra
    
    LEFT JOIN musicbrainz.artist_credit AS cre 
        ON cre.id = tra.artist_credit
    LEFT JOIN musicbrainz.artist_credit_name AS cre_many2many 
        ON cre.id = cre_many2many.artist_credit
    LEFT JOIN musicbrainz.artist AS art
        ON art.id = cre_many2many.artist
    
    WHERE art.area IN (SELECT id FROM pl);         

    SELECT 
        column_name, data_type, is_nullable
    FROM 
        information_schema.columns
    WHERE 
        table_name = 'features_pl';
    
""").set_index('column_name')

Unnamed: 0_level_0,data_type,is_nullable
column_name,Unnamed: 1_level_1,Unnamed: 2_level_1
track,integer,YES
artist,integer,YES


## Case study

In [3]:
artist = "Maryla Rodowicz"

In [79]:
tracks = SQL(f"""

    WITH

        target AS (
            SELECT id 
            FROM musicbrainz.artist 
            WHERE UPPER(name) 
            LIKE '%{artist.upper()}%'
        ),
    
        output AS (
            SELECT track
            FROM features_pl
            WHERE artist IN (SELECT id FROM target)
        ),

        colab AS (
            SELECT DISTINCT artist FROM features_pl 
            WHERE track IN (SELECT track FROM output)
        )
    
    SELECT * FROM features_pl 
        WHERE artist IN (SELECT artist FROM colab);

""")

tracks

Unnamed: 0,track,artist
0,590139,52119
1,590144,52119
2,951373,84953
3,951377,84953
4,951378,84953
...,...,...
5748,17802621,937404
5749,17802622,937404
5750,17802623,937404
5751,17802624,937404


In [7]:
artist_labels = SQL(f"""

    WITH

        target AS (
            SELECT id 
            FROM musicbrainz.artist 
            WHERE UPPER(name) 
            LIKE '%{artist.upper()}%'
        ),
    
        output AS (
            SELECT track
            FROM features_pl
            WHERE artist IN (SELECT id FROM target)
        ),

        colab AS (
            SELECT DISTINCT artist FROM features_pl 
            WHERE track IN (SELECT track FROM output)
        )
    
    SELECT id, name FROM musicbrainz.artist
        WHERE id IN (SELECT artist FROM colab);

""").set_index('id')

artist_labels

Unnamed: 0_level_0,name
id,Unnamed: 1_level_1
450057,Seweryn Krajewski
554378,Patrycja Kosiarkiewicz
807100,Sławek Uniatowski
52119,Czesław Niemen
628471,Renata Dąbkowska
573264,Maciej Miecznikowski
573263,Andrzej Lampert
103209,Edyta Geppert
370987,Kasia Cerekwicka
160812,Ryszard Rynkowski


In [81]:
track_labels = SQL(f"""

    WITH

        target AS (
            SELECT id 
            FROM musicbrainz.artist 
            WHERE UPPER(name) 
            LIKE '%{artist.upper()}%'
        ),
    
        output AS (
            SELECT track
            FROM features_pl
            WHERE artist IN (SELECT id FROM target)
        ),

        colab AS (
            SELECT DISTINCT artist FROM features_pl 
            WHERE track IN (SELECT track FROM output)
        )
    
    SELECT 
        DISTINCT features_pl.track AS id, 
        track.name, release.name as release
        
        FROM features_pl
    LEFT JOIN musicbrainz.track AS track
        ON track.id = features_pl.track

    LEFT JOIN musicbrainz.medium AS med
        ON med.id = track.medium

    LEFT JOIN musicbrainz.release AS release
        ON release.id = med.release

    WHERE artist IN (SELECT artist FROM colab);

""").set_index('id')

track_labels

Unnamed: 0_level_0,name,release
id,Unnamed: 1_level_1,Unnamed: 2_level_1
590133,Pod Papugami,Pod Papugami - Największe Przeboje
590134,Dziwny jest ten świat,Pod Papugami - Największe Przeboje
590135,Sen o Warszawie,Pod Papugami - Największe Przeboje
590136,Czy mnie jeszcze pamiętasz,Pod Papugami - Największe Przeboje
590137,Sukces,Pod Papugami - Największe Przeboje
...,...,...
46205395,Czy Mnie Jeszcze Pamiętasz,Pamiętam Ten Dzień
46205396,"Hej Dziewczyno, Hej",Pamiętam Ten Dzień
46205397,Sen O Warszawie,Pamiętam Ten Dzień
46205398,Wspomnienie,Pamiętam Ten Dzień


### Więcej dancyh

In [None]:
import pandas as pd

### Popularność

In [26]:
import spotipy, requests
from spotipy.oauth2 import SpotifyClientCredentials

def load_creds():
    from dotenv import load_dotenv
    load_dotenv()
    
load_creds()

client_credentials_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [111]:
spotify_searches = tracks.merge(artist_labels.reset_index(), left_on='artist', right_on='id', how='left', suffixes=('', '_artist'))
spotify_searches = spotify_searches.merge(track_labels.reset_index(), left_on='track', right_on='id', how='left', suffixes=('', '_track'))
spotify_searches = spotify_searches.rename(columns={'name_track': 'title'})
spotify_searches = spotify_searches.drop_duplicates(subset=['track', 'title']).drop(columns=['id', 'id_track', 'artist'])
spotify_ids, popularity, release_name = ([], [], [])
for i, row in spotify_searches.iterrows():
    
    q = f"artist:{row['name']} track:{row['title']} album:{row['release']}"
    results = sp.search(q=q, type='track')
    
    items = results['tracks']['items']
    
    print(f"{i} {q}: {len(items)}")
    
    if items:
        track = items[0]
        spotify_ids.append(track['id'])
        track_info = sp.track(track['id'])
        popularity.append(track_info['popularity'])
        release_name.append(track_info['album']['name'])
    else:
        spotify_ids.append(None)
        popularity.append(None)
        release_name.append(None)

spotify_searches['spotify_id'] = spotify_ids
spotify_searches['popularity'] = popularity
spotify_searches['release_name'] = release_name

0 artist:Czesław Niemen track:Stoję w oknie album:Pod Papugami - Największe Przeboje: 0
1 artist:Czesław Niemen track:Jeszcze swój egzamin zdasz album:Pod Papugami - Największe Przeboje: 0
2 artist:Edyta Bartosiewicz track:Skłamałam album:Dziecko: 0
3 artist:Edyta Bartosiewicz track:Boogie czyli zemsta słodka jest album:Dziecko: 0
4 artist:Edyta Bartosiewicz track:Coś zmieniło się? album:Dziecko: 0
5 artist:Edyta Bartosiewicz track:Nie jak przyjaciel album:Dziecko: 0
6 artist:Edyta Bartosiewicz track:Dobrze nam album:Dziecko: 0
7 artist:Edyta Geppert track:Nie wierzę piosence album:Wierzę piosence: 0
8 artist:Edyta Geppert track:Niewysłowiona album:Wierzę piosence: 0
9 artist:Edyta Geppert track:Zdradzonym i bezbronnym album:Wierzę piosence: 0
10 artist:Edyta Geppert track:Jaka róża taki cierń album:Wierzę piosence: 0
11 artist:Edyta Geppert track:Nie jest źle album:Wierzę piosence: 0
12 artist:Piotr Szczepanik track:Żółte kalendarze album:Złote przeboje: 0
13 artist:Piotr Szczepanik t

### Graf

In [112]:
import networkx as nx, plotly.graph_objects as go

In [113]:
G = nx.from_pandas_edgelist(tracks, 'track', 'artist')
pos = nx.spring_layout(G)

artist_nodes, track_nodes, track_nodes_unknown = ([], [], [])
for node in G.nodes:

    node_data = {

        'x': pos[node][0],
        'y': pos[node][1],
        'text': artist_labels.loc[node].iloc[0] if node in artist_labels.index 
            else track_labels.loc[node].iloc[0] if node in track_labels.index
            else node
    }

    if node in artist_labels.index:
        artist_nodes.append(node_data)
        continue

    if spotify_searches.loc[spotify_searches['track'] == node, 'spotify_id'].iloc[0]:
        node_data['popularity'] = spotify_searches.loc[spotify_searches['track'] == node, 'popularity'].iloc[0]
        track_nodes.append(node_data)
        continue

    track_nodes_unknown.append(node_data)

artist_nodes = pd.DataFrame(artist_nodes)
track_nodes = pd.DataFrame(track_nodes)
track_nodes_unknown = pd.DataFrame(track_nodes_unknown)

edges = pd.DataFrame({
    'x': [coord for edge in G.edges for coord in (pos[edge[0]][0], pos[edge[1]][0], None)], 
    'y': [coord for edge in G.edges for coord in (pos[edge[0]][1], pos[edge[1]][1], None)]
})

In [142]:
fig = go.Figure().update_layout(
    title=f"{artist} i kolaboracje",
    autosize=False,
    margin=dict(l=0, r=50, b=0, t=50, pad=0),
    paper_bgcolor="White",
    plot_bgcolor="White",
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)

fig.add_trace(go.Scatter(
    name='krawędzie',
    x=edges['x'], y=edges['y'],
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines',
    opacity=0.5
))

fig.add_trace(go.Scatter(
    name='autorzy',
    x=artist_nodes['x'], 
    y=artist_nodes['y'],
    hoverinfo='text', text=artist_nodes['text'],

    mode='markers', marker=dict(
        symbol='star',
        color='Red',
        size=10
    )
))
 
fig.add_trace(go.Scatter(
    name='tytuły',
    x=track_nodes['x'], 
    y=track_nodes['y'],
    hoverinfo='text', text=track_nodes['text'],
    mode='markers', marker=dict(
        symbol='circle',
        color='Green',
        size=track_nodes['popularity']/5

    )
))

fig.add_trace(go.Scatter(
    name='tytuły o nieznanej popularności',
    x=track_nodes_unknown['x'],
    y=track_nodes_unknown['y'],
    hoverinfo='text', text=track_nodes_unknown['text'],
    mode='markers', marker=dict(
        symbol='circle',
        color='Black',
        size=1,
        opacity=0.5
    )
))