In [1]:
from neo4j import GraphDatabase
import pandas as pd

In [2]:
# Neo4j connection info
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "testpassword"

# Connect
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

In [3]:
def run_query(query, params=None):
    with driver.session() as session:
        result = session.run(query, parameters=params or {})
        return pd.DataFrame([r.data() for r in result])

## Total number of songs

In [4]:
query = "MATCH (s:Song) RETURN COUNT(s) AS total_songs"
run_query(query)

Unnamed: 0,total_songs
0,11530


## Top sampled songs

In [10]:
query = """
MATCH (:Song)-[r:SAMPLES]->(target:Song)
WITH target, COUNT(DISTINCT r) AS times_sampled
OPTIONAL MATCH (target)-[:HAS_ARTIST]->(artist:Artist)
WITH target.id AS song_id,
     target.title AS song_title,
     COLLECT(DISTINCT artist.name) AS artist_names,
     times_sampled
RETURN song_id, song_title, artist_names, times_sampled
ORDER BY times_sampled DESC
LIMIT 10
"""
run_query(query)

Unnamed: 0,song_id,song_title,artist_names,times_sampled
0,Lyn-Collins/Think-(About-It),Think (About It),[Lyn Collins],38
1,Beside/Change-the-Beat-(Female-Version),Change the Beat (Female Version),[Beside],31
2,"The-Winstons/Amen,-Brother","Amen, Brother",[The Winstons],27
3,Doug-E.-Fresh/La-Di-Da-Di,La Di Da Di,"[Slick Rick, Doug E. Fresh]",24
4,James-Brown/Funky-Drummer,Funky Drummer,[],24
5,Run-DMC/Here-We-Go-(Live-at-the-Funhouse),Here We Go (Live at the Funhouse),[],23
6,The-Honey-Drippers/Impeach-the-President,Impeach the President,[],21
7,Lil-Jon/Who-U-Wit%3F,Who U Wit?,"[The East Side Boyz, Lil Jon]",21
8,Mountain/Long-Red,Long Red,[Mountain],20
9,YG/I%27m-Good,I'm Good,[YG],17


## Genre distribution

In [14]:
query = """
MATCH (:Song)-[:BELONGS_TO_GENRE]->(g:Genre)
RETURN g.name AS genre, COUNT(*) AS count
ORDER BY count DESC
"""
run_query(query)

Unnamed: 0,genre,count
0,hip hop,2424
1,east coast hip hop,745
2,gangsta rap,544
3,electronic,539
4,rap,521
...,...,...
1400,rap/alternative and punk,1
1401,?,1
1402,jermaine,1
1403,rap hip-hop,1


## Community

In [15]:
query = """
MATCH (s:Song)
RETURN 
  s.sampling_community AS sampling_community,
  avg(s.danceability_danceable) AS danceability_danceable,
  avg(s.genre_dortmund_alternative) AS genre_dortmund_alternative,
  avg(s.genre_dortmund_blues) AS genre_dortmund_blues,
  avg(s.genre_dortmund_electronic) AS genre_dortmund_electronic,
  avg(s.genre_dortmund_folkcountry) AS genre_dortmund_folkcountry,
  avg(s.genre_dortmund_funksoulrnb) AS genre_dortmund_funksoulrnb,
  avg(s.genre_dortmund_jazz) AS genre_dortmund_jazz,
  avg(s.genre_dortmund_pop) AS genre_dortmund_pop,
  avg(s.genre_dortmund_raphiphop) AS genre_dortmund_raphiphop,
  avg(s.genre_dortmund_rock) AS genre_dortmund_rock,
  avg(s.genre_electronic_ambient) AS genre_electronic_ambient,
  avg(s.genre_electronic_dnb) AS genre_electronic_dnb,
  avg(s.genre_electronic_house) AS genre_electronic_house,
  avg(s.genre_electronic_techno) AS genre_electronic_techno,
  avg(s.genre_electronic_trance) AS genre_electronic_trance,
  avg(s.genre_rosamerica_cla) AS genre_rosamerica_cla,
  avg(s.genre_rosamerica_dan) AS genre_rosamerica_dan,
  avg(s.genre_rosamerica_hip) AS genre_rosamerica_hip,
  avg(s.genre_rosamerica_jaz) AS genre_rosamerica_jaz,
  avg(s.genre_rosamerica_pop) AS genre_rosamerica_pop,
  avg(s.genre_rosamerica_rhy) AS genre_rosamerica_rhy,
  avg(s.genre_rosamerica_roc) AS genre_rosamerica_roc,
  avg(s.genre_rosamerica_spe) AS genre_rosamerica_spe,
  avg(s.genre_tzanetakis_blu) AS genre_tzanetakis_blu,
  avg(s.genre_tzanetakis_cla) AS genre_tzanetakis_cla,
  avg(s.genre_tzanetakis_cou) AS genre_tzanetakis_cou,
  avg(s.genre_tzanetakis_dis) AS genre_tzanetakis_dis,
  avg(s.genre_tzanetakis_hip) AS genre_tzanetakis_hip,
  avg(s.genre_tzanetakis_jaz) AS genre_tzanetakis_jaz,
  avg(s.genre_tzanetakis_met) AS genre_tzanetakis_met,
  avg(s.genre_tzanetakis_pop) AS genre_tzanetakis_pop,
  avg(s.genre_tzanetakis_reg) AS genre_tzanetakis_reg,
  avg(s.genre_tzanetakis_roc) AS genre_tzanetakis_roc,
  avg(s.ismir04_rhythm_ChaChaCha) AS ismir04_rhythm_ChaChaCha,
  avg(s.ismir04_rhythm_Jive) AS ismir04_rhythm_Jive,
  avg(s.ismir04_rhythm_Quickstep) AS ismir04_rhythm_Quickstep,
  avg(s.ismir04_rhythm_Rumba_American) AS ismir04_rhythm_Rumba_American,
  avg(s.ismir04_rhythm_Rumba_International) AS ismir04_rhythm_Rumba_International,
  avg(s.ismir04_rhythm_Rumba_Misc) AS ismir04_rhythm_Rumba_Misc,
  avg(s.ismir04_rhythm_Samba) AS ismir04_rhythm_Samba,
  avg(s.ismir04_rhythm_Tango) AS ismir04_rhythm_Tango,
  avg(s.ismir04_rhythm_VienneseWaltz) AS ismir04_rhythm_VienneseWaltz,
  avg(s.ismir04_rhythm_Waltz) AS ismir04_rhythm_Waltz,
  avg(s.mood_acoustic_acoustic) AS mood_acoustic_acoustic,
  avg(s.mood_aggressive_aggressive) AS mood_aggressive_aggressive,
  avg(s.mood_electronic_electronic) AS mood_electronic_electronic,
  avg(s.mood_happy_happy) AS mood_happy,
  avg(s.mood_party) AS mood_party,
  avg(s.mood_relaxed) AS mood_relaxed,
  avg(s.mood_sad) AS mood_sad,
  avg(s.timbre_bright) AS timbre_bright,
  avg(s.tonal_atonal_atonal) AS tonal_atonal_atonal,
  avg(s.voice_instrumental_voice) AS voice_instrumental_voice
"""

community_features = run_query(query)
community_features

Unnamed: 0,sampling_community,danceability_danceable,genre_dortmund_alternative,genre_dortmund_blues,genre_dortmund_electronic,genre_dortmund_folkcountry,genre_dortmund_funksoulrnb,genre_dortmund_jazz,genre_dortmund_pop,genre_dortmund_raphiphop,...,mood_acoustic_acoustic,mood_aggressive_aggressive,mood_electronic_electronic,mood_happy,mood_party,mood_relaxed,mood_sad,timbre_bright,tonal_atonal_atonal,voice_instrumental_voice
0,115,0.653694,0.019310,0.027513,0.836702,0.025528,0.003441,0.022812,0.004444,0.045586,...,0.168722,0.145159,0.765468,0.289882,0.439518,0.544220,0.315805,0.368309,0.280809,0.519009
1,139,0.720428,0.022757,0.019207,0.839445,0.064314,0.002576,0.011565,0.004639,0.012073,...,0.323377,0.188391,0.603914,0.324836,0.424724,0.623896,0.398414,0.369367,0.368813,0.689851
2,72,0.685935,0.017469,0.026961,0.824337,0.023274,0.003995,0.040201,0.004326,0.043529,...,0.242417,0.160094,0.766100,0.231227,0.420188,0.574436,0.335657,0.409198,0.277211,0.546431
3,59,,,,,,,,,,...,,,,,,,,,,
4,46,0.839255,0.011483,0.015677,0.917328,0.016169,0.002342,0.016548,0.002478,0.007831,...,0.097365,0.309988,0.838636,0.290733,0.676710,0.430722,0.221542,0.539480,0.266332,0.485006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,123,0.622672,0.056932,0.066231,0.605073,0.165901,0.008319,0.026408,0.016542,0.005730,...,0.542926,0.017304,0.587858,0.279626,0.120256,0.833613,0.512825,0.603499,0.962744,0.498739
155,63,0.998816,0.007260,0.000861,0.988166,0.001818,0.000103,0.000396,0.000477,0.000110,...,0.119497,0.018436,0.596662,0.161163,0.554444,0.815655,0.574401,0.373946,0.096894,0.911424
156,144,0.973150,0.004699,0.000458,0.993046,0.000818,0.000032,0.000306,0.000076,0.000038,...,0.012957,0.489698,0.846298,0.626100,0.869026,0.436118,0.215393,0.687663,0.065921,0.855692
157,145,,,,,,,,,,...,,,,,,,,,,


In [6]:
query = """
    MATCH (s:Song)
    RETURN s.sampling_community AS community, count(*) AS size
    ORDER BY size DESC
    """

community_list = run_query(query)
community_list

Unnamed: 0,community,size
0,84,340
1,105,280
2,114,243
3,146,239
4,101,231
...,...,...
154,16,2
155,111,2
156,123,2
157,91,1


In [16]:
community_merged = community_list.merge(community_features, left_on='community', right_on='sampling_community', how='left')
community_merged

Unnamed: 0,community,size,sampling_community,danceability_danceable,genre_dortmund_alternative,genre_dortmund_blues,genre_dortmund_electronic,genre_dortmund_folkcountry,genre_dortmund_funksoulrnb,genre_dortmund_jazz,...,mood_acoustic_acoustic,mood_aggressive_aggressive,mood_electronic_electronic,mood_happy,mood_party,mood_relaxed,mood_sad,timbre_bright,tonal_atonal_atonal,voice_instrumental_voice
0,84,340,84,0.801745,0.028569,0.014730,0.887896,0.018321,0.002988,0.010007,...,0.140278,0.189904,0.775638,0.312586,0.518569,0.570692,0.283295,0.437342,0.254680,0.625457
1,105,280,105,0.769220,0.014564,0.025619,0.897409,0.012114,0.002283,0.016348,...,0.138704,0.300995,0.789378,0.308110,0.579694,0.451862,0.251377,0.427915,0.269805,0.535800
2,114,243,114,0.807297,0.018090,0.019175,0.894768,0.013928,0.002595,0.020524,...,0.187390,0.236226,0.790992,0.305313,0.583782,0.505667,0.284474,0.515318,0.335058,0.572200
3,146,239,146,0.741634,0.024000,0.030099,0.861238,0.020276,0.004178,0.016301,...,0.154922,0.294612,0.785960,0.289338,0.579725,0.475368,0.249166,0.466112,0.252064,0.536918
4,101,231,101,0.638726,0.030195,0.032164,0.828850,0.029660,0.004093,0.026326,...,0.266718,0.170085,0.759361,0.254937,0.384962,0.619455,0.384829,0.486619,0.282815,0.524445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,16,2,16,,,,,,,,...,,,,,,,,,,
155,111,2,111,,,,,,,,...,,,,,,,,,,
156,123,2,123,0.622672,0.056932,0.066231,0.605073,0.165901,0.008319,0.026408,...,0.542926,0.017304,0.587858,0.279626,0.120256,0.833613,0.512825,0.603499,0.962744,0.498739
157,91,1,91,,,,,,,,...,,,,,,,,,,


In [21]:
row = community_merged.iloc[2]  # or any specific row you select
for col, val in row.items():
    print(f"{col}: {val}")

community: 114.0
size: 243.0
sampling_community: 114.0
danceability_danceable: 0.8072970843344938
genre_dortmund_alternative: 0.018090282645379367
genre_dortmund_blues: 0.019174677740735672
genre_dortmund_electronic: 0.8947683998899905
genre_dortmund_folkcountry: 0.01392785759535819
genre_dortmund_funksoulrnb: 0.0025951757780074817
genre_dortmund_jazz: 0.020523680805529316
genre_dortmund_pop: 0.002834042433127244
genre_dortmund_raphiphop: 0.01356444132325048
genre_dortmund_rock: 0.014521445568996035
genre_electronic_ambient: 0.32828196757572903
genre_electronic_dnb: 0.04480582603466964
genre_electronic_house: 0.3398155131035512
genre_electronic_techno: 0.04582852738147875
genre_electronic_trance: 0.24126816676972443
genre_rosamerica_cla: 0.023106088916965983
genre_rosamerica_dan: 0.0860389561820335
genre_rosamerica_hip: 0.519722288568863
genre_rosamerica_jaz: 0.04985024021619113
genre_rosamerica_pop: 0.07584471180321245
genre_rosamerica_rhy: 0.1857305011595289
genre_rosamerica_roc: 0.0