In [1]:
import os
import re
import json
import urllib
import requests
import numpy as np
import urllib.parse
import pandas as pd
from bs4 import BeautifulSoup

In [18]:
def import_csv(filepath):
    df_artists = pd.read_csv(filepath, delimiter = ',')
    return df_artists

In [57]:
# # 取得歌手對應的歌曲清單，收集歌單內的歌曲ID
# 取得歌曲頁面url，存成list(song_url)
# https://segmentfault.com/a/1190000014948845
def get_songs(artist_id):
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'}
    r = requests.get('http://music.163.com/artist?id={}'.format(artist_id),headers=headers)
    song_ids = re.findall(r'song\?id=(\d+?)".+?</a>',r.text) #歌曲id列表
    song_titles = re.findall(r'song\?id=\d+?">(.+?)</a>',r.text)#歌名title列表
    song_url = []
    for i in range(len(song_ids)):
        song_id = song_ids[i]
        url = 'https://music.163.com/#/song?id={}'.format(song_id)
        song_url.append(url)
    
    # 根據特定歌手ID搜尋歌手名稱
    # https://music.163.com/#/artist?id=1045123
    df_artists_filter = df_artists.loc[df_artists["artist_id"] == artist_id]
    artist_name = df_artists_filter['artist_name'].values[0]

    artist_url = 'http://music.163.com/artist?id=' + str(artist_id) #歌手頁面連結
    
    return [song_ids, song_titles, song_url, artist_name, artist_url]

In [34]:
# 歌單的歌轉成dataframe並存成csv
# 亂碼使用encoding='utf_8_sig'解決
def get_songinfo(song_ids, song_titles, song_url, artist_name):
    df_songs = pd.DataFrame()
    artist_name_list = []
    for i in range(len(song_ids)):
        artist_name_list.append(artist_name)
    df_songs['Artist'] = pd.Series(artist_name_list)
    df_songs['SongID'] = pd.Series(song_ids)
    df_songs['SongTitle'] = pd.Series(song_titles)
    df_songs['SongURL'] = pd.Series(song_url)
    filepath = 'CloudMusic_csv/artists/df_songs_' + artist_name + '.csv'
    df_songs.to_csv(filepath, encoding='utf_8_sig')
    return df_songs

In [35]:
# 檔案名稱所有非法字元都轉義：https://www.itread01.com/content/1549125182.html
def validateTitle(title):
    rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |'
    new_title = re.sub(rstr, " ", title)  # 替換為空白
    return new_title

In [40]:
# 取得歌名和歌手
def get_name_artist(df_songs, count):
    artistname = df_songs['Artist'][count]
    artistname = validateTitle(artistname)
    songname = df_songs['SongTitle'][count]
    songname = validateTitle(songname)
    songid = df_songs['SongID'][count]
    songid = validateTitle(songid)
    print(artistname + '_' + songname + '\n' + songid)
    return artistname, songname, songid

In [37]:
# 取得動態歌詞
def get_lyrics(songid):
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'}
    url = 'http://music.163.com/api/song/lyric?id={}&lv=-1&kv=-1&tv=-1'.format(songid)
    response = requests.get(url, headers=headers)
    data = response.json()
    return data

In [38]:
# 下載動態歌詞
# 解決UnicodeDecodeError: 'cp950'問題，加入encoding="utf-8"
def save_lrc_file(data, artistname, songname):
    content = data['lrc']['lyric']
    filepath = 'CloudMusic_lrc/artists/'+ artist_name + '_' + songname +'.lrc'
    with open(filepath, 'w', encoding="utf-8") as file:
        file.write(content)
    print(filepath)

In [60]:
# 儲存音樂
def save_mp3_file(filename, content):
    filepath = 'CloudMusic_mp3/'+ filename
    with open(filepath, mode = "wb") as f:
        f.write(content)
    print(filepath)

In [None]:
# 下載音樂.mp3
def download_music(songname, songid):
    # 取得Request URL
    url = 'http://music.163.com/song/media/outer/url?id=' + str(songid)+'.mp3'
    response = requests.get(url)
    content = response.content
    # mp3檔案命名規則
    save_mp3_file(songname + '_' + songid + '.mp3', content)

In [58]:
# 主程式抓歌曲
artist_id = '45236'
df_artists = import_csv('CloudMusic_csv/df_artists.csv')
[song_ids, song_titles, song_url, artist_name, artist_url] = get_songs(artist_id)
df_songs = get_songinfo(song_ids, song_titles, song_url, artist_name)
for i in range(len(df_songs)):
    artistname, songname, songid = get_name_artist(df_songs, i)
    try:
        data = get_lyrics(songid)
        save_lrc_file(data, artistname, songname)
    except:
        pass
    print('-'*70)

Avicii_Waiting For Love
31356499
CloudMusic_lrc/artists/Avicii_Waiting For Love.lrc
----------------------------------------------------------------------
Avicii_The Nights
29771146
CloudMusic_lrc/artists/Avicii_The Nights.lrc
----------------------------------------------------------------------
Avicii_Wake Me Up
27713920
CloudMusic_lrc/artists/Avicii_Wake Me Up.lrc
----------------------------------------------------------------------
Avicii_Without You
496869523
CloudMusic_lrc/artists/Avicii_Without You.lrc
----------------------------------------------------------------------
Avicii_Waiting for Love
32196550
CloudMusic_lrc/artists/Avicii_Waiting for Love.lrc
----------------------------------------------------------------------
Avicii_Lonely Together
496869520
CloudMusic_lrc/artists/Avicii_Lonely Together.lrc
----------------------------------------------------------------------
Avicii_SOS
1357848241
CloudMusic_lrc/artists/Avicii_SOS.lrc
--------------------------------------------

In [62]:
url = 'https://m801.music.126.net/20200113025447/8cd60d138777a13dd24f33d64be06b59/jdyyaac/520f/0158/0308/eefd2984f862ac2408b94bb0011ebcfa.m4a'
response = requests.get(url)
content = response.content
save_mp3_file('Alan Walker_Faded.mp3', content)

CloudMusic_mp3/Alan Walker_Faded.mp3
