In [1]:
import requests
import urllib
import math
import time
import random

import pandas as pd
import sqlite3

In [2]:
my_headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Host': 'music.163.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36'
}

In [3]:
def getJSON(url, headers):
    """
    @ param url:
    @ param headers:
    @ return json:
    """
    res = requests.get(url, headers=headers)
    res.raise_for_status()  
    res.encoding = 'utf-8'  
    json = res.json()
    return json

In [4]:
def countPages(total, limit):
    """
    @ param total:
    @ return page:
    """
    page = math.ceil(total/limit) 
    return page

In [5]:
def getSongInfo(song_list):
    """
    
    """
    song_info_list = []
    
    for song in song_list:
        song_info = []
    
        song_info.append(song['id'])
        song_info.append(song['name'])
    
        artists_name = ''
        artists = song['artists']
        for artist in artists:
            artists_name += artist['name'] + ','
        song_info.append(artists_name)
    
        song_info.append(song['album']['name'])
        song_info.append(song['album']['id'])
        song_info.append(song['duration'])
        
        song_info_list.append(song_info)
        
    return song_info_list

In [6]:
def getSongList(key, limit=30):
    """
    @ param key:
    @ return result:
    """
    total_list = []
    key = urllib.parse.quote(key)
    url = 'http://music.163.com/api/search/get/web?csrf_token=&hlpretag=&hlposttag=&s=' + key +  '&type=1&offset=0&total=true&limit='
    
    first_page = getJSON(url, my_headers)
    song_count = first_page['result']['songCount']
    page_num = countPages(song_count, limit)
    
    for n in range(page_num):
        url = 'http://music.163.com/api/search/get/web?csrf_token=&hlpretag=&hlposttag=&s=' + key +  '&type=1&offset=' + str(n) + '&total=true&limit=' + str(limit)
        tmp = getJSON(url, my_headers)
        song_list = getSongInfo(tmp['result']['songs'])
        total_list += song_list
        
        print('第 {0}/{1} 页爬取完成'.format(n+1, page_num))
        time.sleep(random.randint(2, 8)) 
        
    df = pd.DataFrame(data = total_list)
    return df

In [7]:
def getComment(comments):
    """
    
    """
    comments_list = []
    
    for comment in comments:
        comment_info = []
        comment_info.append(comment['commentId'])
        comment_info.append(comment['user']['userId'])
        comment_info.append(comment['user']['nickname'])
        comment_info.append(comment['user']['avatarUrl'])
        comment_info.append(comment['content'])
        comment_info.append(comment['likedCount'])
        comments_list.append(comment_info)
        
    return comments_list

In [19]:
def getSongComment(id, limit=20):
    """
    @ param id:
    @ return result:
    """
    total_comment = []
    url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_' + id +  '?limit=20&offset=0'
    
    first_page = getJSON(url, my_headers)
    total = first_page['total']
    page_num = countPages(total, limit)
    
    for n in range(page_num):
        url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_' + id +  '?limit=' + str(limit) + '&offset=' + str(n)
        tmp = getJSON(url, my_headers)
        comment_list = getComment(tmp['comments'])
        total_comment += comment_list
        
        print('第 {0}/{1} 页爬取完成'.format(n+1, page_num))
        time.sleep(random.randint(2, 8)) 
        
    df = pd.DataFrame(data = total_comment)
    return df

In [20]:
df = getSongComment('27804065')
df

第 1/4 页爬取完成
第 2/4 页爬取完成
第 3/4 页爬取完成
第 4/4 页爬取完成


Unnamed: 0,0,1,2,3,4,5
0,1170047340,444889972,是悦悦同学阿,http://p1.music.126.net/qgXUk7eX95nXQiqVSM8oSA...,他姓陆,0
1,1164643942,270264247,銀与,http://p1.music.126.net/LLs-smpUS-LWlvKvBY2cWA...,失眠。聽會竇唯。,1
2,1061041503,255981536,安隆汶的死神,http://p1.music.126.net/DJYCLZRcvFNOFapjxVb0lg...,美好的日子里你是锦上的花，\n昏沉的日子里你是雾中的光。\n❤,4
3,719938994,280778741,Vlance_,http://p1.music.126.net/NecGAJRABwd1KNsXKOhIPg...,只是她姓陆,3
4,718437926,271253793,祭红颜一曲明月诉流殇,http://p1.music.126.net/YqyU4K9h04YnZiS6U47fYw...,嗯哼,0
5,652738564,492033609,花开半夏似水流,http://p1.music.126.net/1Rhdd3yKmIx40xo4Tbnbjg...,君の名は、[爱心],3
6,645898005,471904220,陆伯渝,http://p1.music.126.net/loXX03FxDS5ftOJIbLMh5g...,鄙人姓陆,2
7,639673871,533315628,帐号已注销,http://p1.music.126.net/RLeBJe4D1ZzUtltxfoKDMg...,仿佛医院深夜中只身未眠的病患 听仪器嘶鸣 生生死死恍恍惚惚 又好像从未如此清醒过,4
8,634714922,121332848,荔枝味不好养,http://p1.music.126.net/3InrmpgnsTV78wF175Mt8g...,吃了止疼药趴在桌子上 没有勇气再抬头了,3
9,608063736,271253793,祭红颜一曲明月诉流殇,http://p1.music.126.net/YqyU4K9h04YnZiS6U47fYw...,棒,0
