In [1]:
import json
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
import seaborn as sns

plt.rc('font', family='Malgun Gothic')
sns.set(font="Malgun Gothic",#"NanumGothicCoding", 
        rc={"axes.unicode_minus":False}, # 마이너스 부호 깨짐 현상 해결
        style='darkgrid')

# 태그 데이터

In [2]:
train = pd.read_json('train.json', typ = 'frame')

In [5]:
train

Unnamed: 0,tags,id,plylst_title,songs,like_cnt,updt_date
0,[락],61281,여행같은 음악,"[525514, 129701, 383374, 562083, 297861, 13954...",71,2013-12-19 18:36:19.000
1,"[추억, 회상]",10532,요즘 너 말야,"[432406, 675945, 497066, 120377, 389529, 24427...",1,2014-12-02 16:19:42.000
2,"[까페, 잔잔한]",76951,"편하게, 잔잔하게 들을 수 있는 곡.-","[83116, 276692, 166267, 186301, 354465, 256598...",17,2017-08-28 07:09:34.000
3,"[연말, 눈오는날, 캐럴, 분위기, 따듯한, 크리스마스캐럴, 겨울노래, 크리스마스,...",147456,크리스마스 분위기에 흠뻑 취하고 싶을때,"[394031, 195524, 540149, 287984, 440773, 10033...",33,2019-12-05 15:15:18.000
4,[댄스],27616,추억의 노래 ㅋ,"[159327, 553610, 5130, 645103, 294435, 100657,...",9,2011-10-25 13:54:56.000
...,...,...,...,...,...,...
115066,"[록메탈, 밴드사운드, 록, 락메탈, 메탈, 락, extreme]",120325,METAL E'SM #2,"[429629, 441511, 612106, 516359, 691768, 38714...",3,2020-04-17 04:31:11.000
115067,[일렉],106976,빠른 리스너를 위한 따끈따끈한 최신 인기 EDM 모음!,"[321330, 216057, 534472, 240306, 331098, 23288...",13,2015-12-24 17:23:19.000
115068,"[담시, 가족, 눈물, 그리움, 주인공, 나의_이야기, 사랑, 친구]",11343,#1. 눈물이 앞을 가리는 나의_이야기,"[50512, 249024, 250608, 371171, 229942, 694943...",4,2019-08-16 20:59:22.000
115069,"[잔잔한, 버스, 퇴근버스, Pop, 풍경, 퇴근길]",131982,퇴근 버스에서 편히 들으면서 하루를 마무리하기에 좋은 POP,"[533534, 608114, 343608, 417140, 609009, 30217...",4,2019-10-25 23:40:42.000


In [3]:
# 플레이리스트 아이디(id)와 매핑된 태그(tags) 추출
plylst_tag_map = train[['id', 'tags']]

# unnest tags
plylst_tag_map_unnest = np.dstack(
    (
        np.repeat(plylst_tag_map.id.values, list(map(len, plylst_tag_map.tags))), 
        np.concatenate(plylst_tag_map.tags.values)
    )
)

# unnested 데이터프레임 생성 : plylst_tag_map
plylst_tag_map = pd.DataFrame(data = plylst_tag_map_unnest[0], columns = plylst_tag_map.columns)
plylst_tag_map['id'] = plylst_tag_map['id'].astype(str)

# unnest 객체 제거
del plylst_tag_map_unnest

In [4]:
plylst_tag_map

Unnamed: 0,id,tags
0,61281,락
1,10532,추억
2,10532,회상
3,76951,까페
4,76951,잔잔한
...,...,...
476326,131982,퇴근길
476327,100389,노래추천
476328,100389,팝송추천
476329,100389,팝송


In [5]:
plylst_tag_map['tags'].nunique()

29160

In [6]:
# 1. 플레이리스트 별 매핑 태그 수 count 테이블 생성 : plylst_tag_cnt
plylst_tag_cnt = pd.DataFrame(plylst_tag_map.groupby('id').tags.nunique())

In [7]:
# 태그 별 매핑 빈도 수 저장 
tag_cnt = plylst_tag_map.groupby('tags').tags.count().reset_index(name = 'mapping_cnt')
tag_cnt['tags'] = tag_cnt['tags'].astype(str)
tag_cnt['mapping_cnt'] = tag_cnt['mapping_cnt'].astype(int)

# 빈도 수가 1000회 이상인 태그만 저장
tag_cnt = tag_cnt[tag_cnt['mapping_cnt'] >= 100]


In [8]:
tag_cnt['mapping_cnt'].sort_values()

27364      100
18741      100
19033      101
19056      101
1702       101
         ...  
22262    10218
12894    10796
28675    11215
5234     11417
7065     16465
Name: mapping_cnt, Length: 447, dtype: int32

In [9]:
tag_cnt.sort_values('mapping_cnt',ascending=False)

Unnamed: 0,tags,mapping_cnt
7065,기분전환,16465
5234,감성,11417
28675,휴식,11215
12894,발라드,10796
22262,잔잔한,10218
...,...,...
1702,Electronic,101
19033,여자,101
19056,여자보컬,101
27364,하늘,100


In [10]:
print(tag_cnt['tags'].unique())

['12월' '1980' '1990' '1990년대' '2000' '2000년대' '2010' '2010년대' '2017'
 '2019' '7080' '90년대' 'ASMR' 'Billboard' 'CCM' 'Chill' 'DJ' 'EDM'
 'EDMFloor' 'Electronic' 'HIPHOPLE' 'HipHop' 'JPOP' 'Jazz' 'Lofi' 'M에센셜'
 'OST' 'Pop' 'RNBSOUL' 'Rap' 'RnB' 'Rock' 'bgm' 'classic' 'dance'
 'electronica' 'etc' 'groove' 'indie' 'kpop' 'ralalra' 'soul' '가사' '가요'
 '가을' '가을감성' '가을밤' '가을비' '가족' '가창력' '감각적' '감각적인' '감동' '감미로운' '감성' '감성발라드'
 '감성음악' '감성자극' '감성적인' '감성팝' '감성힙합' '감수성' '걸그룹' '겨울' '겨울감성' '겨울노래' '겨울밤'
 '경쾌한' '고독' '고막남친' '고막여친' '고백' '고속도로' '공감' '공부' '공부할때' '공연' '국내' '국내힙합'
 '국힙' '귀르가즘' '그루브' '그루비' '그리움' '금요일' '기분' '기분업' '기분전환' '기분좋은' '기억' '기타'
 '까페' '꿀잠' '꿈' '나들이' '나른한' '나만알고싶은' '나만의Best3' '낙엽' '날씨' '남자' '남자아이돌' '낭만'
 '낮잠' '내적댄스' '내한' '노동요' '노래' '노래방' '노래추천' '노을' '눈' '눈물' '눈오는날' '뉴에이지' '느낌'
 '느낌있는' '다이어트' '달달' '달달한' '달콤' '달콤한' '댄스' '댄스곡' '더위' '데이트' '독서' '동요'
 '두근두근' '듀엣' '드라마' '드라마ost' '드라이브' '듣기좋은' '딥하우스' '따듯한' '따뜻한' '따스한' '띵곡'
 '띵곡들' '라디오' '라운지' '라틴' '락' '랩' '레전드' '레트로' '로맨틱' '록' '록메탈' '리드미컬' '리듬'
 

In [11]:
# 기쁠 때 = 신남, 청량한, 에너지, 즐거운
# 화날 때 = 명상
# 슬플 때 - 잔잔 = 새벽,잔잔
#         - 신남 = 우울할때, 힘내, 화이팅

In [12]:
#plylst_tag_map=plylst_tag_map.query("tags in ['신남','청량한','에너지','즐거운','명상','슬픔','슬픈','힘내','화이팅']")

In [13]:
plylst_tag_map['id']=plylst_tag_map['id'].astype('int64')

In [14]:
plylst_tag_map

Unnamed: 0,id,tags
0,61281,락
1,10532,추억
2,10532,회상
3,76951,까페
4,76951,잔잔한
...,...,...
476326,131982,퇴근길
476327,100389,노래추천
476328,100389,팝송추천
476329,100389,팝송


# 곡 데이터 

In [15]:
song_meta = pd.read_json('song_meta.json', typ = 'frame')

In [16]:
song_meta

Unnamed: 0,song_gn_dtl_gnr_basket,issue_date,album_name,album_id,artist_id_basket,song_name,song_gn_gnr_basket,artist_name_basket,id
0,[GN0901],20140512,불후의 명곡 - 7080 추억의 얄개시대 팝송베스트,2255639,[2727],Feelings,[GN0900],[Various Artists],0
1,"[GN1601, GN1606]",20080421,"Bach : Partitas Nos. 2, 3 & 4",376431,[29966],"Bach : Partita No. 4 In D Major, BWV 828 - II....",[GN1600],[Murray Perahia],1
2,[GN0901],20180518,Hit,4698747,[3361],Solsbury Hill (Remastered 2002),[GN0900],[Peter Gabriel],2
3,"[GN1102, GN1101]",20151016,Feeling Right (Everything Is Nice) (Feat. Popc...,2644882,[838543],Feeling Right (Everything Is Nice) (Feat. Popc...,[GN1100],[Matoma],3
4,"[GN1802, GN1801]",20110824,그남자 그여자,2008470,[560160],그남자 그여자,[GN1800],[Jude Law],4
...,...,...,...,...,...,...,...,...,...
707984,[GN2001],19991219,The Best Best Of The Black President,65254,[166499],Coffin For Head Of State,[GN2000],[Fela Kuti],707984
707985,[GN0901],19860000,True Colors,44141,[11837],Change Of Heart,[GN0900],[Cyndi Lauper],707985
707986,"[GN0105, GN0101]",20160120,행보 2015 윤종신 / 작사가 윤종신 Live Part.1,2662866,[437],스치듯 안녕,[GN0100],[윤종신],707986
707987,"[GN1807, GN1801]",20131217,명상의 시간을 위한 뉴에이지 음악,2221722,[729868],숲의 빛,[GN1800],[Nature Piano],707987


In [17]:
song_tag=pd.merge(plylst_tag_map,song_meta,how='inner',on='id')

In [203]:
song_happy=song_tag.query("tags in ['신남','즐거운']")
song_angry=song_tag.query("tags in ['명상','차분한']")
song_sad_down=song_tag.query("tags in ['슬픔']")
song_sad_up=song_tag.query("tags in ['기분업']")

In [204]:
song_netural=train.sort_values('like_cnt',ascending=False)

In [205]:
song_netural

Unnamed: 0,tags,id,plylst_title,songs,like_cnt,updt_date
46524,"[노동요, 취향저격, 드라이브, Pop, 기분전환, 트렌디, 스트레스, 신나는]",74220,HOT TRENDY POP: 놓쳐선 안될 'POP' (매주 업데이트),"[159317, 368264, 600999, 274905, 643023, 24070...",53211,2020-04-17 14:32:23.000
85071,"[감성, 새벽, 때껄룩, 잔잔한, 휴식, 팝송, 떼껄룩, 힐링, 팝, 신나는]",122388,❤️때껄룩님 ᴛᴀᴋᴇ ᴀ ʟᴏᴏᴋ 플레이리스트❤️,"[532771, 383775, 432753, 176384, 152694, 60126...",41844,2020-04-23 20:58:45.000
102651,"[매장음악, 카페, 러블리쏘, 분위기, 드라이브, 여행, 매력적인, 기분전환, 팝]",26174,듣다보면 '우와!' 하고 제목을 보게되는 팝,"[40160, 20039, 561488, 20051, 16242, 256568, 7...",27268,2020-04-19 21:22:11.000
101464,"[흥겨운, 드라이브, Pop, 기분전환, 경쾌한, 외출, 내적댄스, 기분업, 충전,...",72658,"약속 있어? 외출 전, 기분 UP 하고 싶을 때 들어봐! [매주 목요일]","[111779, 406878, 417725, 129878, 357770, 34879...",23965,2020-04-22 16:05:39.000
59894,"[힐링, 휴식, 기분전환]",113501,감성이 터지는 팝음악들,"[342524, 1233, 417666, 394523, 107455, 17206, ...",23732,2015-07-29 08:25:35.000
...,...,...,...,...,...,...
104613,[어쿠스틱기타],132666,어쿠스틱 좋은 노래,"[110540, 688028, 624009, 593300, 494305, 65325...",0,2018-04-24 19:10:01.000
71156,[락],9179,라디오 797,"[27673, 112731, 550192, 647261, 509152, 314370...",0,2010-11-09 13:28:23.000
16539,"[잔잔한, 힐링, 명상, 피아노, 차분함]",42577,따스한 감성을 전하는 피아노 음악,"[274395, 95894, 201745, 191637, 602182, 200150...",0,2019-05-03 10:14:33.000
71151,"[댄스, 댄스곡, 신나는]",61003,내적댄스 대박! 쉰나는 댄스곡,"[690285, 580860, 455945, 114387, 498452, 34758...",0,2019-12-21 00:09:24.000


In [206]:
song_netural1=song_netural['songs'].iloc[0]

In [207]:
song_netural1=song_netural1[0:100]

In [208]:
song_netural2=song_netural['songs'].iloc[1]

In [209]:
song_netural2=song_netural2[0:100]

In [210]:
song_netural1.extend(song_netural2)

In [211]:
len(song_netural1)

200

In [212]:
song_netural=pd.DataFrame({'id':song_netural1})

In [213]:
song_netural

Unnamed: 0,id
0,159317
1,368264
2,600999
3,274905
4,643023
...,...
195,566420
196,152462
197,700348
198,545260


In [214]:
song_netural=pd.merge(song_netural,song_meta,how='inner',on='id')

In [215]:
song_netural['tags']='netural'

In [216]:
song_netural.head()

Unnamed: 0,id,song_gn_dtl_gnr_basket,issue_date,album_name,album_id,artist_id_basket,song_name,song_gn_gnr_basket,artist_name_basket,tags
0,159317,[GN0901],20200228,Confidant,10384661,[2731448],Confidant,[GN0900],[BabyJake],netural
1,368264,"[GN0908, GN0901]",20200218,How To Love (feat. GRAY),10388900,[2854781],How To Love (feat. GRAY),[GN0900],[ALLY],netural
2,600999,"[GN0908, GN0901]",20200220,Say It Again,10390827,[2742725],Say It Again,[GN0900],[Jimmy Smash],netural
3,274905,"[GN0908, GN0901]",20200220,Another Life,10390880,[1134287],Another Life (Feat. Destiny Rogers),[GN0900],[Alex Aiono],netural
4,643023,"[GN0908, GN0901]",20200221,6 Months,10391183,[2640909],6 months,[GN0900],[John K],netural


In [226]:
song_happy=song_happy.sample(200)
song_angry=song_angry.sample(200)
song_sad_down=song_sad_down.sample(200)
song_sad_up=song_sad_up.sample(200)

In [227]:
song_happy['tags']='happy'
song_angry['tags']='angry'
song_sad_down['tags']='saddown'
song_sad_up['tags']='sadup'

In [228]:
song=pd.concat([song_happy,song_angry,song_sad_down,song_sad_up,song_netural],axis=0)

In [229]:
song

Unnamed: 0,id,tags,song_gn_dtl_gnr_basket,issue_date,album_name,album_id,artist_id_basket,song_name,song_gn_gnr_basket,artist_name_basket
82824,120791,happy,"[GN2104, GN2101]",20110801,2011 워십투어 라이브 IN 서울 `God With Us`,2006864,[101268],예수님 연약한 날 위해,[GN2100],[Disciples]
274053,17907,happy,"[GN1013, GN1001]",20050726,Mr. A-Z,306565,[50775],Life Is Wonderful,[GN1000],[Jason Mraz]
56216,85206,happy,"[GN1601, GN1602]",20141223,Vivaldi: 6 Cello Concertos,191242,"[32004, 100378, 32094]",Cello Concerto in G minor RV416 Allegro,[GN1600],"[Christophe Coin, The Academy Of Ancient Music..."
413683,107860,happy,"[GN1807, GN1801]",20150703,커피 향기를 닮은 감미로운 뉴에이지 피아노,2327375,[749191],나를 위한 휴식 공간,[GN1800],[홍유란]
48034,77251,happy,[GN0701],20040101,가요백년사 다시 듣고싶은 그 시절 그 노래...,30446,[953],님계신 전선,[GN0700],[금사향]
...,...,...,...,...,...,...,...,...,...,...
195,566420,netural,"[GN0901, GN0902]",20170616,Melodrama,10043187,[725692],Hard Feelings/Loveless,[GN0900],[Lorde]
196,152462,netural,[GN0901],20180105,This Is Not An Album,10075613,[1133767],`93,[GN0900],[Youngr]
197,700348,netural,"[GN0401, GN0501, GN0509]",20170719,Walking in the Moonlight,10080593,[995101],"Walking in the Moonlight (Feat. 다원, Lazier)","[GN0500, GN0400]",[서교동의 밤]
198,545260,netural,[GN0901],20170721,Mermaid,10081101,[1274805],Mermaid,[GN0900],[Skott]


In [230]:
song=song[['tags','issue_date','album_name','artist_name_basket','song_name']]

In [231]:
song=song.reset_index(drop=True)

In [232]:
song.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   tags                1000 non-null   object
 1   issue_date          1000 non-null   int64 
 2   album_name          1000 non-null   object
 3   artist_name_basket  1000 non-null   object
 4   song_name           1000 non-null   object
dtypes: int64(1), object(4)
memory usage: 39.2+ KB


In [233]:
song

Unnamed: 0,tags,issue_date,album_name,artist_name_basket,song_name
0,happy,20110801,2011 워십투어 라이브 IN 서울 `God With Us`,[Disciples],예수님 연약한 날 위해
1,happy,20050726,Mr. A-Z,[Jason Mraz],Life Is Wonderful
2,happy,20141223,Vivaldi: 6 Cello Concertos,"[Christophe Coin, The Academy Of Ancient Music...",Cello Concerto in G minor RV416 Allegro
3,happy,20150703,커피 향기를 닮은 감미로운 뉴에이지 피아노,[홍유란],나를 위한 휴식 공간
4,happy,20040101,가요백년사 다시 듣고싶은 그 시절 그 노래...,[금사향],님계신 전선
...,...,...,...,...,...
995,netural,20170616,Melodrama,[Lorde],Hard Feelings/Loveless
996,netural,20180105,This Is Not An Album,[Youngr],`93
997,netural,20170719,Walking in the Moonlight,[서교동의 밤],"Walking in the Moonlight (Feat. 다원, Lazier)"
998,netural,20170721,Mermaid,[Skott],Mermaid


In [238]:
song_art=song['song_name']
song_album=song['album_name']

In [239]:
song2="https://www.melon.com/search/total/index.htm?q="+song_album+'+'+song_art

In [240]:
song2[1]

'https://www.melon.com/search/total/index.htm?q=Mr. A-Z+Life Is Wonderful'

In [241]:
song2=pd.DataFrame({'song_url':song2})

In [242]:
song=pd.concat([song,song2],axis=1)

In [243]:
song['song_url'].iloc[0]

'https://www.melon.com/search/total/index.htm?q=2011 워십투어 라이브 IN 서울 `God With Us`+예수님 연약한 날 위해'

In [245]:
song.to_excel('song_url.xlsx')

In [2]:
song=pd.read_csv('song.csv',index_col=0)

In [197]:
song_hap=song.query("tags in ['happy']")

In [198]:
song_hap

Unnamed: 0,tags,issue_date,album_name,artist_name_basket,song_name,song_url
0,happy,20120803,The Great American Songbook Collection,[Beegie Adair],Skylark,https://www.melon.com/search/total/index.htm?q...
1,happy,20170609,Cigarettes After Sex,[Cigarettes After Sex],Each Time You Fall in Love,https://www.melon.com/search/total/index.htm?q...
2,happy,20180824,The Dream Thief,[Shai Maestro],The Dream Thief,https://www.melon.com/search/total/index.htm?q...
3,happy,20180912,어린이 인기 동요 베스트 100,[동요싱싱 (singsing)],싹싹닦아라,https://www.melon.com/search/total/index.htm?q...
4,happy,20150710,X-Men: Days Of Future Past - Rogue Cut (Origin...,[John Ottman],All Those Voices,https://www.melon.com/search/total/index.htm?q...
...,...,...,...,...,...,...
195,happy,19930525,Together,[Julian Bream],"L`encouragement, Op. 34 Waltz",https://www.melon.com/search/total/index.htm?q...
196,happy,20150528,피아노로 쓰는 편지,[Hansol],꼭 행복하셔야 해요,https://www.melon.com/search/total/index.htm?q...
197,happy,20131101,Did You Hear The Rain?,[George Ezra],Did You Hear The Rain?,https://www.melon.com/search/total/index.htm?q...
198,happy,19940500,The Return Of N.EX.T Part 1 The Being,[넥스트],이중 인격자,https://www.melon.com/search/total/index.htm?q...


In [201]:
song_hap.to_csv('song_happy.csv')

In [199]:
song_hap1=song_hap['song_url']

In [3]:
song

Unnamed: 0,tags,issue_date,album_name,artist_name_basket,song_name
0,happy,20110801,2011 워십투어 라이브 IN 서울 `God With Us`,['Disciples'],예수님 연약한 날 위해
1,happy,20050726,Mr. A-Z,['Jason Mraz'],Life Is Wonderful
2,happy,20141223,Vivaldi: 6 Cello Concertos,"['Christophe Coin', 'The Academy Of Ancient Mu...",Cello Concerto in G minor RV416 Allegro
3,happy,20150703,커피 향기를 닮은 감미로운 뉴에이지 피아노,['홍유란'],나를 위한 휴식 공간
4,happy,20040101,가요백년사 다시 듣고싶은 그 시절 그 노래...,['금사향'],님계신 전선
...,...,...,...,...,...
995,netural,20170616,Melodrama,['Lorde'],Hard Feelings/Loveless
996,netural,20180105,This Is Not An Album,['Youngr'],`93
997,netural,20170719,Walking in the Moonlight,['서교동의 밤'],"Walking in the Moonlight (Feat. 다원, Lazier)"
998,netural,20170721,Mermaid,['Skott'],Mermaid


In [6]:
song=pd.read_excel('song_url.xlsx',index_col=0)

In [7]:
song

Unnamed: 0,tags,issue_date,album_name,artist_name_basket,song_name,song_url
0,happy,20110801,2011 워십투어 라이브 IN 서울 `God With Us`,['Disciples'],예수님 연약한 날 위해,https://www.melon.com/search/total/index.htm?q...
1,happy,20050726,Mr. A-Z,['Jason Mraz'],Life Is Wonderful,https://www.melon.com/search/total/index.htm?q...
2,happy,20141223,Vivaldi: 6 Cello Concertos,"['Christophe Coin', 'The Academy Of Ancient Mu...",Cello Concerto in G minor RV416 Allegro,https://www.melon.com/search/total/index.htm?q...
3,happy,20150703,커피 향기를 닮은 감미로운 뉴에이지 피아노,['홍유란'],나를 위한 휴식 공간,https://www.melon.com/search/total/index.htm?q...
4,happy,20040101,가요백년사 다시 듣고싶은 그 시절 그 노래...,['금사향'],님계신 전선,https://www.melon.com/search/total/index.htm?q...
...,...,...,...,...,...,...
995,netural,20170616,Melodrama,['Lorde'],Hard Feelings/Loveless,https://www.melon.com/search/total/index.htm?q...
996,netural,20180105,This Is Not An Album,['Youngr'],`93,https://www.melon.com/search/total/index.htm?q...
997,netural,20170719,Walking in the Moonlight,['서교동의 밤'],"Walking in the Moonlight (Feat. 다원, Lazier)",https://www.melon.com/search/total/index.htm?q...
998,netural,20170721,Mermaid,['Skott'],Mermaid,https://www.melon.com/search/total/index.htm?q...
