In [1]:
import json
import time
import random
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data1.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data1.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        total_comments = 0
        max_retries = 3
        
        # 添加热门评论抓取
        hot_comments = self.get_comments(song_id, 0, 100)
        if hot_comments and 'hotComments' in hot_comments:
            for comment in hot_comments['hotComments']:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    total_comments += 1
                    print(f"Saved hot comment from user: {nickname}")
                    time.sleep(0.5)
                    
                except Exception as e:
                    print(f"Error processing hot comment: {str(e)}")
                    continue
        
        # 抓取普通评论
        while True:
            retry_count = 0
            while retry_count < max_retries:
                try:
                    result = self.get_comments(song_id, offset, limit)
                    if result and 'comments' in result:
                        break
                except Exception:
                    retry_count += 1
                    time.sleep(3)
                    
            if retry_count == max_retries:
                print(f"Failed to get comments after {max_retries} retries")
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    total_comments += 1
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {total_comments} comments")
            
            # 添加随机延时
            time.sleep(random.uniform(1, 3))
            
            # 如果超过5000条评论，考虑是否继续
            if total_comments >= 5000:
                print("Reached maximum comment limit")
                break

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2600227363'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved hot comment from user: 王小旋同学
Saved hot comment from user: 隼眠
Saved hot comment from user: Glockiebae_813
Saved hot comment from user: Kriszhu_
Saved hot comment from user: 冷君Coldreams
Saved hot comment from user: MC小饼
Saved hot comment from user: 宫城城城城良田
Saved hot comment from user: 滑水冠軍zz
Saved hot comment from user: Jiaeul
Saved hot comment from user: 这位作者很脆弱
Saved hot comment from user: rea1_dap
Saved hot comment from user: 以致遠
Saved hot comment from user: rea1_dap
Saved hot comment from user: CRTD40
Saved hot comment from user: YoungStylish-D首饰被点赞看歌单
Saved comment from user: Bery1-
Saved comment from user: 诗崽心
Saved comment from user: mirror-o_O
Saved comment from user: 李木子_329
Saved comment from user: 粒粒鸿宇
Saved comment from user: 粒粒鸿宇
Saved comment from user: EGGN9
Saved comment from user: SZOPHRENIC
Saved comment from user: 谪守6
Saved comment from user: 没有焦灼的午后
Saved comment from user: 没有焦灼的午后
Saved comment from user: ShiEr1337
Saved comment from user: 凌寒潇潇楺花蓉
Saved comment

In [2]:
import json
import time
import random
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data2.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data2.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        total_comments = 0
        max_retries = 3
        
        # 添加热门评论抓取
        hot_comments = self.get_comments(song_id, 0, 100)
        if hot_comments and 'hotComments' in hot_comments:
            for comment in hot_comments['hotComments']:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    total_comments += 1
                    print(f"Saved hot comment from user: {nickname}")
                    time.sleep(0.5)
                    
                except Exception as e:
                    print(f"Error processing hot comment: {str(e)}")
                    continue
        
        # 抓取普通评论
        while True:
            retry_count = 0
            while retry_count < max_retries:
                try:
                    result = self.get_comments(song_id, offset, limit)
                    if result and 'comments' in result:
                        break
                except Exception:
                    retry_count += 1
                    time.sleep(3)
                    
            if retry_count == max_retries:
                print(f"Failed to get comments after {max_retries} retries")
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    total_comments += 1
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {total_comments} comments")
            
            # 添加随机延时
            time.sleep(random.uniform(1, 3))
            
            # 如果超过5000条评论，考虑是否继续
            if total_comments >= 5000:
                print("Reached maximum comment limit")
                break

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2643813746'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved hot comment from user: XMASwu
Saved hot comment from user: XMASwu
Saved hot comment from user: 霏常想喝芝芝莓莓
Saved hot comment from user: _Kemmm_
Saved hot comment from user: iuiui_77
Saved hot comment from user: W薯片不吃番茄味
Saved hot comment from user: 会念起小阳嘛
Saved hot comment from user: Hxx_an
Saved hot comment from user: _晚安娜娜
Saved hot comment from user: 霏常想喝芝芝莓莓
Saved hot comment from user: 咩咩摘星星-
Saved comment from user: realrichfa
Saved comment from user: Miraitowa__sun
Saved comment from user: 一起去那场日落
Saved comment from user: AsYouL-
Saved comment from user: 亲爱的碎
Saved comment from user: yvonneyuer-
Saved comment from user: 冰菓koily
Saved comment from user: 前奏_他来看我的演唱会
Saved comment from user: 前奏_他来看我的演唱会
Saved comment from user: 前奏_他来看我的演唱会
Saved comment from user: AAW愛妳卟變MAA
Saved comment from user: JINN-hz
Saved comment from user: _Kemmm_
Saved comment from user: Bluned
Saved comment from user: Kwayeei
Saved comment from user: 平静的疯感
Saved comment from user: 请不要骂人-_-
Saved comme

KeyboardInterrupt: 

In [3]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data3.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data3.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644210912'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: -你去挪威找鱼算账
Saved comment from user: 羽yuhcy
Saved comment from user: 南京庆庆
Saved comment from user: oi-iocc
Saved comment from user: 别又问我
Saved comment from user: 攀云点灯
Saved comment from user: 1D温缘
Saved comment from user: 云村村民164415434901649
Saved comment from user: 唯一爱的乐队hhhh
Saved comment from user: 微糊双面鱼
Saved comment from user: 冥王星mks
Saved comment from user: 我是飞天面条大王
Saved comment from user: 不肯南飞的候鸟
Saved comment from user: 树王国
Saved comment from user: 1ranus
Saved comment from user: 微醺瓶中鱼
Saved comment from user: 你微笑时好美丶炜
Saved comment from user: 晚一些些哦
Saved comment from user: 灯塔少女光尘
Saved comment from user: 灯塔少女光尘
Saved comment from user: 咸鱼即使翻身也是咸鱼
Saved comment from user: 独夜沉默的树
Saved comment from user: 满城的泪水
Saved comment from user: VIP-徐晓桐
Saved comment from user: 天气没说准被胖揍
Saved comment from user: Yoshy-ovo
Saved comment from user: Yoshy-ovo
Saved comment from user: 雨析雨曦
Saved comment from user: 攀云点灯
Saved comment from user: 代号11的Shirley
Saved comment 

KeyboardInterrupt: 

In [4]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data4.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data4.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644174830'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: Qnjinn
Saved comment from user: 最爱是康娜
Saved comment from user: Qnjinn
Saved comment from user: 兰灯难挑
Saved comment from user: 智慧之神雅典娜
Saved comment from user: NFWborntodo
Saved comment from user: VIP-徐晓桐
Saved comment from user: 想念__n7fW
Saved comment from user: -爱要及时A
Saved comment from user: 昨晚你不太行
Saved comment from user: 鲸鱼王1710
Saved comment from user: 龙骨星兰丶
Saved comment from user: W香菜ww
Saved comment from user: Eternity-L18
Saved comment from user: ONFOREVERLY
Saved comment from user: 人生全剧终946
Saved comment from user: -谢堡王
Saved comment from user: 爱芒果炒酸奶
Saved comment from user: 0-XF-0
Saved comment from user: 不许凶兔子
Saved comment from user: 被风吹跑的人
Saved comment from user: easyfategg
Saved comment from user: 能不能想想我_
Saved comment from user: 十月啰-_
Saved comment from user: 灯叔灯叔
Saved comment from user: 去更远的地方_QhN6
Saved comment from user: 孤雏sss
Saved comment from user: 小婕包汉堡y
Saved comment from user: iridescent_uT0n
Saved comment from user: 小紫饱饱有点饿
Saved com

In [5]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data3.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data3.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2643800682'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()


Saved comment from user: 不再有所期待_yccR
Saved comment from user: wxwo_O
Saved comment from user: 万恶的弹幕君
Saved comment from user: 培培暴富变肥仔
Saved comment from user: -人们追随星星
Saved comment from user: 来生有个幸福的家qiiz
Saved comment from user: 可能世吧
Saved comment from user: A7onE1_
Saved comment from user: 若闻风雨
Saved comment from user: 尒沫的花
Saved comment from user: Z先生的梦-
Saved comment from user: xdkxwg
Saved comment from user: 裁决之地第一艾克
Saved comment from user: Staric_cosmo
Saved comment from user: 飘一过--
Saved comment from user: 烧弍
Saved comment from user: NightFurry
Saved comment from user: lalala_svn1
Saved comment from user: hotaru92
Saved comment from user: 思念也是种病
Saved comment from user: 殇心灬殇璃
Saved comment from user: AlanJane
Saved comment from user: The丶Hydenix
Saved comment from user: RAIN屿語
Saved comment from user: The丶Hydenix
Saved comment from user: 千岛崎山
Saved comment from user: 伊莎Esha
Saved comment from user: 香草味三色杯
Saved comment from user: Medi-vh
Saved comment from user: LeuanBahia
Save

In [6]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data6.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data6.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644581592'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()


Saved comment from user: 猫起什么名
Saved comment from user: 慈祥的蛋糕
Saved comment from user: 中二萌新_
Saved comment from user: 忧伤你别忧伤了我心疼
Saved comment from user: 天降之炎
Saved comment from user: Didealism
Saved comment from user: 纯情病
Saved comment from user: 单循_hi
Saved comment from user: oITvTlo
Saved comment from user: Deowj
Saved comment from user: 痛名为猫
Saved comment from user: 小雨还有爱的么
Saved comment from user: 就尖酸刻薄怎么了
Saved comment from user: 天真感觉
Saved comment from user: osjheksj4
Saved comment from user: 小欣冰茶
Saved comment from user: 当泪浸透我
Saved comment from user: 临雨期
Saved comment from user: 临雨期
Saved comment from user: 遇厌-
Saved comment from user: 当然美式啦
Saved comment from user: 小乐碎冰冰
Saved comment from user: 碎行诗
Saved comment from user: 碎行诗
Saved comment from user: 无忧小月1
Saved comment from user: 一路繁华的夏200316
Saved comment from user: 小熊天下无敌
Saved comment from user: 抖孩
Saved comment from user: -如初太难
Saved comment from user: X-kill
Saved comment from user: 小丞不睡觉
Saved comment from user: 痛的尸体

In [7]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data7.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data7.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644736276'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()


Saved comment from user: ABEI2ty329
Saved comment from user: DHXCS
Saved comment from user: _敬少年热诚
Saved comment from user: 灰葉仁-
Saved comment from user: NENGBENDZ
Saved comment from user: 安登翔
Saved comment from user: Peaceloveserenity
Saved comment from user: 安德烈再努力
Saved comment from user: G_水草
Saved comment from user: 奢香富婆
Saved comment from user: 南柯一梦wwtx
Saved comment from user: 韭菜是割不完的
Saved comment from user: 在风里无晴也无雨
Saved comment from user: BRTYSL
Saved comment from user: 我和四糸乃有个约会
Saved comment from user: ___Paranoid
Saved comment from user: 云村村民164085428967224
Saved comment from user: YUBB66
Saved comment from user: 等一夏夏嘛
Saved comment from user: 辛砚之
Saved comment from user: 清杯_
Saved comment from user: 檎能补拙
Saved comment from user: SML_
Saved comment from user: 浃鸠
Saved comment from user: 一叶之遥
Saved comment from user: 瞭望星空lnc
Saved comment from user: Livedevi1
Saved comment from user: chair_chair
Saved comment from user: 流光踏月
Saved comment from user: carolir
Saved comment f

In [8]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data8.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data8.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2646173410'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()


Saved comment from user: koki在捕鱼
Saved comment from user: 路遇神仙
Saved comment from user: 靈風最吊
Saved comment from user: 网易云梦女幽魂zz
Saved comment from user: 悬崖杀马
Saved comment from user: 茵栀
Saved comment from user: 彭阿彭阿飘
Saved comment from user: 我先躺两分钟其他的到时候再说_
Saved comment from user: 蓝调要另类嘻哈
Saved comment from user: 是机器人呀
Saved comment from user: 古见希塔莉
Saved comment from user: 三笠艾伦利威尔
Saved comment from user: 围城忸
Saved comment from user: 死讥有人
Saved comment from user: 今天开心明天开心_
Saved comment from user: 今天开心明天开心_
Saved comment from user: 今天开心明天开心_
Saved comment from user: 今天开心明天开心_
Saved comment from user: 今天开心明天开心_
Saved comment from user: 玄关风云
Saved comment from user: 古见希塔莉
Saved comment from user: 古见希塔莉
Saved comment from user: 会儿帮
Saved comment from user: 云村村民165741890356017
Saved comment from user: 梦想去吃三星米其林
Saved comment from user: Pharaoh440
Saved comment from user: 龙玲珑-安红豆
Saved comment from user: 玄关风云
Saved comment from user: 玄关风云
Saved comment from user: 卦者那啥子眉比
Saved comment fro

In [9]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data9.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data9.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2643492354'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()


Saved comment from user: 你懂自由嘛
Saved comment from user: idtexist
Saved comment from user: 记住跳跳虎
Saved comment from user: 茶越绿x_L
Saved comment from user: Atlantis_baby
Saved comment from user: 给你乐蛋挞
Saved comment from user: A-Sunn-
Saved comment from user: Artemis岚煌月
Saved comment from user: 金鱼池的风
Saved comment from user: A-Sunn-
Saved comment from user: 08140526707
Saved comment from user: A-Sunn-
Saved comment from user: 听音乐的蓝光环
Saved comment from user: 北宸呐
Saved comment from user: 邢林蔚第二帅
Saved comment from user: 得闲懒觉多睡点
Saved comment from user: hust1e_1
Saved comment from user: 是想有一家唱片店
Saved comment from user: 有品且眉毛66
Saved comment from user: 靠谱的大鹏哥哥
Saved comment from user: zilin子蔺
Saved comment from user: blk03
Saved comment from user: 东东自有安排
Saved comment from user: lonely_111_
Saved comment from user: JGolden哈哈哈哈哈哈
Saved comment from user: 眼角浸出一滴泪
Saved comment from user: DivineJener
Saved comment from user: Cingeswell
Saved comment from user: 茶越绿x_L
Saved comment from user: Ari

In [10]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data10.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data10.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2643771076'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()


Saved comment from user: 耳朵已被江山下蛊
Saved comment from user: 屁桃丹丹
Saved comment from user: 晴儿的美貌你觊觎不得
Saved comment from user: JARVIS-CHENWEIWEI
Saved comment from user: 含笑九泉-809
Saved comment from user: 凝夏初开
Saved comment from user: 西多士加红豆冰-
Saved comment from user: GRAPEy
Saved comment from user: 叽里咕噜的Marie
Saved comment from user: 缪斯邂逅的艺术家
Saved comment from user: 小代代代代啊
Saved comment from user: 树_野
Saved comment from user: WillingAnn-LX
Saved comment from user: 雾中人wzx
Saved comment from user: dqxh只为山宝
Saved comment from user: Coco可达吖
Saved comment from user: yang_Ysboys
Saved comment from user: JJ20-Gain
Saved comment from user: 雪love的一瞬
Saved comment from user: Isashy
Saved comment from user: 无缘而绝望
Saved comment from user: 逍遥小本风
Saved comment from user: Coco可达吖
Saved comment from user: 恋空小荷花
Saved comment from user: 小事不计较_大事心态好
Saved comment from user: 我有叶子
Saved comment from user: W愛妳卟變W
Saved comment from user: 辰月是只老狐狸
Saved comment from user: WJjea
Saved comment from user: 一鹿沁晗
S

In [11]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data11.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data11.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644043707'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()


Saved comment from user: hiokakeru
Saved comment from user: n1Ce-pLAye2
Saved comment from user: n1Ce-pLAye2
Saved comment from user: 伞兵兲兲是好人
Saved comment from user: 早就不做诗人了
Saved comment from user: 缺雀-bzz
Saved comment from user: 缺雀-bzz
Saved comment from user: 小琉璃冰糖葫芦
Saved comment from user: 断罪之箴言
Saved comment from user: 山东在逃王子
Saved comment from user: 普拉赛PLUSIGN
Saved comment from user: 星耀月舞
Saved comment from user: 战神加贺美
Saved comment from user: 以浪漫为界限啊
Saved comment from user: NUMB-THUG
Saved comment from user: 首隹
Saved comment from user: 丁药药boom
Saved comment from user: 丁药药boom
Saved comment from user: 奈良纪樱子
Saved comment from user: 谓之余悸-玖
Saved comment from user: 涩涩的屑胞人
Saved comment from user: 幻觉红中重度依赖
Saved comment from user: 白塔雀
Saved comment from user: 一鹤川川
Saved comment from user: 半挽山河不归徒
Saved comment from user: 异语书
Saved comment from user: 宇智波佐助人为乐x
Saved comment from user: 即墨无虞
Saved comment from user: 朗笙木
Saved comment from user: 小雨DOvis
Saved comment from user: DonP

In [12]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data12.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data12.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645813070'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()


Saved comment from user: 久伴不离zve
Saved comment from user: 刘翔奶奶
Saved comment from user: 我会离开你的世界
Saved comment from user: 那一刹惊鸿破苍穹
Saved comment from user: 疯魔崛起
Saved comment from user: 长迟_
Saved comment from user: 松月的熊本熊
Saved comment from user: 沈念清
Saved comment from user: 跨越无解缘-
Saved comment from user: 哭的时候你在就好了
Saved comment from user: 讨厌吃烤串-
Saved comment from user: 年糕h-
Saved comment from user: Nil解藥
Saved comment from user: 妄想拥有晚风_
Saved comment from user: 苏赫886
Saved comment from user: 苏赫886
Saved comment from user: 南x翎
Saved comment from user: 小乔妹妹z
Saved comment from user: Rich-520
Saved comment from user: ccccy14
Saved comment from user: 卷毛vivi
Saved comment from user: 无心的梧桐树
Saved comment from user: 丨败炎丨
Saved comment from user: 蘑菇莙TT
Saved comment from user: LL_L1-
Saved comment from user: 小幼喻
Saved comment from user: 小琪qi-h
Saved comment from user: 小琪qi-h
Saved comment from user: y的情绪零碎_
Saved comment from user: 行上海
Saved comment from user: 独一_Yi
Saved comment from user:

In [13]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data13.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data13.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644571029'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 起名好难__Lil-G
Saved comment from user: 被你_YOKE
Saved comment from user: 拾失Mn
Saved comment from user: 西柚ohh_
Saved comment from user: 暴躁的西蓝花小姐
Saved comment from user: R_Ru-慢慢来
Saved comment from user: 许几渔
Saved comment from user: 高发际线美女
Saved comment from user: 高发际线美女
Saved comment from user: 卿玖怜jiu
Saved comment from user: 怪你作
Saved comment from user: 琳做过的一个特别的梦
Saved comment from user: 琳做过的一个特别的梦
Saved comment from user: 拉荼尔
Saved comment from user: 朋克兔兜儿
Saved comment from user: KAIIF0802
Saved comment from user: 朋克兔兔兔-_
Saved comment from user: 6_ghost520
Saved comment from user: 一口琳琳好甜
Saved comment from user: 杰尼鬼
Saved comment from user: 一切都会好起来不是吗
Saved comment from user: 衍路繁华似錦AAA锦衍
Saved comment from user: 大孩子330
Saved comment from user: Moonlight-sx_
Saved comment from user: G-婉儿
Saved comment from user: G-婉儿
Saved comment from user: stellaxyq
Saved comment from user: 比礁
Saved comment from user: 比礁
Saved comment from user: -江户川凉春
Saved comment from use

KeyboardInterrupt: 

In [14]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data14.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data14.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2643537752'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: stilldude
Saved comment from user: 钟鼓人
Saved comment from user: 只爱CBLOCK
Saved comment from user: 小雪团子
Saved comment from user: share希尔
Saved comment from user: laughnow_crylater
Saved comment from user: Yzx518
Saved comment from user: stilldude
Saved comment from user: TOO容
Saved comment from user: TOO容
Saved comment from user: 41Four-one
Saved comment from user: Ka2ryZheng
Saved comment from user: 冷藏汽水
Saved comment from user: 86汪海涛
Saved comment from user: 盼吶
Saved comment from user: 香菇恶魔
Saved comment from user: L6-FMJ
Saved comment from user: 想和于谦共枕眠
Saved comment from user: _____nice____
Saved comment from user: ZR-CHILL
Saved comment from user: Kimi-K-ZN
Saved comment from user: loyd03
Saved comment from user: Wolverinemsa
Saved comment from user: 惰氣熏天
Saved comment from user: 嗯嗯嗯嗯嗯_05rv
Saved comment from user: Pabl0ooo
Saved comment from user: killakin
Saved comment from user: Lwy-ing
Saved comment from user: NOISE-信TU
Saved comment from user: Jaymingu

In [15]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data15.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data15.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644183063'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 大庞小白
Saved comment from user: Sakura四季
Saved comment from user: 江秋朴
Saved comment from user: Meatballs_Gikxo
Saved comment from user: 小狼狗M
Saved comment from user: 皮尔耳朵
Saved comment from user: 皮尔耳朵
Saved comment from user: 皮尔耳朵
Saved comment from user: 黎罹蠡狸
Saved comment from user: 喊破喉咙也没有用
Saved comment from user: 張-Zhang___
Saved comment from user: 含着糖的程艾影
Saved comment from user: AsSSeptember
Saved comment from user: 神经病嘻嘻嘻你忍忍
Saved comment from user: 馒头超人--
Saved comment from user: 拘狱
Saved comment from user: 罖臻
Saved comment from user: to-feel-attachment
Saved comment from user: 会早点睡吗-
Saved comment from user: 会早点睡吗-
Saved comment from user: 地球新鲜旋
Saved comment from user: 地球新鲜旋
Saved comment from user: 地球新鲜旋
Saved comment from user: 地球新鲜旋
Saved comment from user: _-空--白-_
Saved comment from user: Conner康奈
Saved comment from user: -ez4-
Saved comment from user: 勾歪哒不溜
Saved comment from user: 勾歪哒不溜
Saved comment from user: ni阿阳哥
Saved comment from user: 烨si

KeyboardInterrupt: 

In [16]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data16.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data16.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644220790'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 灵幽迷境世界
Saved comment from user: koioo_
Saved comment from user: 伤忆霜花
Saved comment from user: 罚疤
Saved comment from user: Yixi_RuaZ
Saved comment from user: 青v轩ventus
Saved comment from user: 英俊潇洒帅气又能打
Saved comment from user: 马志恒-1
Saved comment from user: 02twe
Saved comment from user: 心岛冰雨
Saved comment from user: 粥一o椰
Saved comment from user: -沈玖菡
Saved comment from user: mooine55
Saved comment from user: 她和小猫都乖
Saved comment from user: 销声匿迹-24
Saved comment from user: 你清醒点n
Saved comment from user: 你清醒点n
Saved comment from user: 小雨同学呀yu
Saved comment from user: Y-林诗妍
Saved comment from user: 失焦轨迹
Saved comment from user: Deathtaru
Saved comment from user: 別冷冰冰la
Saved comment from user: 漫漫星河入我眼
Saved comment from user: 漫漫星河入我眼
Saved comment from user: 皮蛋solo粥_HHJ
Saved comment from user: 小魚冻干ssi
Saved comment from user: 小冷心绪难眠
Saved comment from user: 含郁文字
Saved comment from user: 少点单纯敏感
Saved comment from user: 少点单纯敏感
Saved comment from user: 如果你也听-郑run泽


In [17]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data17.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data17.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644475770'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 她亦是匆匆过客
Saved comment from user: Till-D-l-E
Saved comment from user: 池魚大帝
Saved comment from user: 法老王_sh3k
Saved comment from user: 嘎鱼_ghj0
Saved comment from user: Shining_Shiny
Saved comment from user: 花哥废了
Saved comment from user: 花哥废了
Saved comment from user: 哈小喆
Saved comment from user: 落蟔
Saved comment from user: 落蟔
Saved comment from user: 歪飞飞飞
Saved comment from user: Aurora献南
Saved comment from user: 某幻今天开心了没
Saved comment from user: 无用之人不必刘
Saved comment from user: 她的温柔我感受不到
Saved comment from user: 一位不愿意透露姓名的学分
Saved comment from user: 24hours_7
Saved comment from user: _要饭还嫌馊
Saved comment from user: 眠雾my_
Saved comment from user: 相春言峥ovo
Saved comment from user: 张子_f9Ie
Saved comment from user: 眠雾my_
Saved comment from user: 小底妹妹
Saved comment from user: 她亦是匆匆过客
Saved comment from user: 乱花metro
Saved comment from user: 乱花metro
Saved comment from user: 强迫失忆
Saved comment from user: 强迫失忆
Saved comment from user: Jessie-X21
Saved comment from user: 缘

In [19]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data18.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data18.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645923952'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: WackboiSkip
Saved comment from user: -逃避陽光
Saved comment from user: 唯二ouo
Saved comment from user: Yaoanzhu_
Saved comment from user: Yaoanzhu_
Saved comment from user: PrincechenChen
Saved comment from user: 十口儿艹来
Saved comment from user: SimonToshiya
Saved comment from user: 我说我想整容
Saved comment from user: _理先生gt_
Saved comment from user: 殇kfu
Saved comment from user: 知道就不说了
Saved comment from user: Isaylst
Saved comment from user: WackboiSkip
Saved comment from user: 康康我的叉烧包尼
Saved comment from user: 班颂
Saved comment from user: AcridteYang
Saved comment from user: xiin-hha
Saved comment from user: MaK1mma
Saved comment from user: 良爱苦口
Saved comment from user: 我喜欢海苔薯片best
Saved comment from user: Blame-U-
Saved comment from user: Yaoanzhu_
Saved comment from user: Blame-U-
Saved comment from user: 菜籽油不菜也不油
Saved comment from user: 知道就不说了
Saved comment from user: 江南不胜齐
Saved comment from user: 1iu11u
Saved comment from user: 迟暮人初夏
Saved comment from user: -轩木-

In [20]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data19.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data19.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2642596385'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 天下无锡
Saved comment from user: 空蛹_出蝶
Saved comment from user: WuliHyunho
Saved comment from user: Ddayupuuuuuu
Saved comment from user: Ddayupuuuuuu
Saved comment from user: 舵Diverseddie
Saved comment from user: 空蛹_出蝶
Saved comment from user: 梦想是打败沉默怪兽
Saved comment from user: 空蛹_出蝶
Saved comment from user: 闻顺顺
Saved comment from user: 余余灿-
Saved comment from user: Ddayupuuuuuu
Saved comment from user: Ddayupuuuuuu
Saved comment from user: 是畅畅鸭
Saved comment from user: sugar不爱sugar
Saved comment from user: Ritchey-
Saved comment from user: Ritchey-
Saved comment from user: Eleven_雾
Saved comment from user: 大马哥-MYC
Saved comment from user: get_O_o
Saved comment from user: 交河故城荷花白叙事者
Saved comment from user: 幼儿园抢饭No2
Saved comment from user: 大江东去gvf
Saved comment from user: 凉屿idrg
Saved comment from user: 锦依卫神眷sakura
Saved comment from user: 梁凉没灵感
Saved comment from user: 奶酥好乖
Saved comment from user: 今天的砖异常烫手
Saved comment from user: 汐芰最可爱
Saved comment from user

In [21]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data20.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data20.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644516969'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 恁想怎样
Saved comment from user: 凌霜初雪凝露纷飞
Saved comment from user: 要做糕手
Saved comment from user: 板饭蛋
Saved comment from user: 倾琪很甜
Saved comment from user: 曲奇在思考
Saved comment from user: BEEKII比奇
Saved comment from user: 橙子很白o小飞熊
Saved comment from user: 每当风生竹院
Saved comment from user: 我是海苔我是海苔
Saved comment from user: 沛沛酱CX
Saved comment from user: WOAIXJB
Saved comment from user: 午夜进行国学
Saved comment from user: 八号当铺店长
Saved comment from user: 樱桃番茄蛋
Saved comment from user: 王权杀架
Saved comment from user: 薄荷音王子o
Saved comment from user: 油管精选汇
Saved comment from user: 薏苡和栀恩
Saved comment from user: 键之勇者
Saved comment from user: Mianyang趙拾叁
Saved comment from user: ringaringaringr
Saved comment from user: Mym清
Saved comment from user: 带我去玩-
Saved comment from user: 狼火_半匪半雅
Saved comment from user: M_10-9
Saved comment from user: 别多想呼呼
Saved comment from user: 那你要听走马吗
Saved comment from user: 网上邻居_10086
Saved comment from user: 冥界悟空
Saved comment from user: 一个普普通通的王
S

In [None]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data21.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data21.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2643549847'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

In [None]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data22.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data22.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644182486'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

In [1]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data23.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data23.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645495145'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 殊澈_7
Saved comment from user: 全世界坠帅气的米斯
Saved comment from user: 喜欢满了
Saved comment from user: 念向风
Saved comment from user: 璟卿__kkk
Saved comment from user: 盞蕓
Saved comment from user: Tt_UNIQUE
Saved comment from user: 赛博木鱼_
Saved comment from user: A菌A君
Saved comment from user: 小嫣宝ovo
Saved comment from user: 周周哈哈周周
Saved comment from user: 抽开回忆
Saved comment from user: agiye
Saved comment from user: 雨湆
Saved comment from user: 南昌加藤鹰
Saved comment from user: Cc阳阳阳-
Saved comment from user: 山楂树爱恋
Saved comment from user: 丐帮帮主肥宅月月
Saved comment from user: __Silent______
Saved comment from user: __Silent______
Saved comment from user: 空xhy
Saved comment from user: 風亦為笑
Saved comment from user: Goxhm
Saved comment from user: 之麻阿
Saved comment from user: 幸fuEari_
Saved comment from user: Coc会后悔
Saved comment from user: Jin-nnnn
Saved comment from user: 怨念纸飞机
Saved comment from user: Jin-nnnn
Saved comment from user: 全球音乐会客厅
Saved comment from user: 奥特曼柔弱
Saved com

In [2]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data24.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data24.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2643057778'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 醉迟吟
Saved comment from user: 杏椿铃纪
Saved comment from user: 咲比宰治
Saved comment from user: 会过期的凤梨罐头_
Saved comment from user: 鬼鬼_NIbZ
Saved comment from user: Cici_pCZ5
Saved comment from user: icy程儿
Saved comment from user: 你也没薯汁吗
Saved comment from user: 马达加斯加赵四
Saved comment from user: 索尼克打十个
Saved comment from user: YOJOS
Saved comment from user: Tdyio
Saved comment from user: IHTS-
Saved comment from user: _Kryptonite-_
Saved comment from user: 叶帆quk
Saved comment from user: E999_
Saved comment from user: 许安zZ
Saved comment from user: 怎么瘦不了呜呜
Saved comment from user: 小乔妳了
Saved comment from user: Ann安fairy
Saved comment from user: 盆地等比数列
Saved comment from user: 夜露附红花
Saved comment from user: 你的斌哥哥-BG
Saved comment from user: 多加柠檬呀
Saved comment from user: 分享欲是最大的浪漫
Saved comment from user: 五色土mcf
Saved comment from user: 吾王是来水经验的
Saved comment from user: 优雅的霹雳章鱼
Saved comment from user: 冥王星mks
Saved comment from user: 讨厌戒指
Saved comment from user: 是李知恩鸭_
Sa

In [3]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data25.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data25.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645445891'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 山鬼Prajna
Saved comment from user: 你今天笑了没-
Saved comment from user: 凌晨4点海
Saved comment from user: 岁月i安然
Saved comment from user: -亡语系-
Saved comment from user: __小王子的狐狸
Saved comment from user: 孤独本孤本独
Saved comment from user: 何日风祈
Saved comment from user: 爱学结果
Saved comment from user: 哈哈-Shoko-玛卡巴卡
Saved comment from user: 霖霖霖lllllll
Saved comment from user: 落日余晖_肉嘟嘟
Saved comment from user: Fix丶Bawdy
Saved comment from user: 柠檬树0OOO00
Saved comment from user: 一吻枪决
Saved comment from user: 雾都敲编钟
Saved comment from user: 雾都敲编钟
Saved comment from user: 左辅星君
Saved comment from user: TAKAaaa-
Saved comment from user: 想见你无数次
Saved comment from user: 小黑哥嗯哼
Saved comment from user: 月矢仓
Saved comment from user: 淡水In
Saved comment from user: 淡水In
Saved comment from user: 等待冬天候鸟
Saved comment from user: 等待冬天候鸟
Saved comment from user: 用户1441168772
Saved comment from user: 雾都敲编钟
Saved comment from user: 故弄玄虚卖弄思想
Saved comment from user: 逍遥门第一大弟子
Saved comment from user: 岁

In [None]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data26.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data26.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644550957'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

In [5]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data27.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data27.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644489929'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: R_WKsc
Saved comment from user: 别细胞
Saved comment from user: 许笙kd
Saved comment from user: 腐朽丶Eros1on
Saved comment from user: 小罗不吃f
Saved comment from user: Grace_hong_
Saved comment from user: 洛小跑
Saved comment from user: 之ong钟-
Saved comment from user: 芝小升
Saved comment from user: 小星哦_M
Saved comment from user: T_cur7
Saved comment from user: 乔不乔治y
Saved comment from user: 一抹残月_殇
Saved comment from user: 晚风吹净余情
Saved comment from user: 熬不动叶
Saved comment from user: 丶你快跑我追你
Saved comment from user: 不见到易烊千玺不换网名
Saved comment from user: 岁月里来岁月里去
Saved comment from user: 用户8248556456
Saved comment from user: 回忆逐渐闪帧
Saved comment from user: 烛星吃柠檬
Saved comment from user: 可乐遇到曼妥思
Saved comment from user: 归尘尺
Saved comment from user: 代安渊
Saved comment from user: km927
Saved comment from user: 一整遍手写的从前
Saved comment from user: Liu-PepsiCo
Saved comment from user: -7七7七7七7-
Saved comment from user: Zume__
Saved comment from user: 夏日成熟迷幻氛围
Saved comment from user: 久伴不

In [7]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data28.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data28.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644205231'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 赴卿210524
Saved comment from user: 抱树树树树
Saved comment from user: 品味独特鸭
Saved comment from user: 词词喜欢笑笑笑
Saved comment from user: Yoonum
Saved comment from user: 小猫掉泪
Saved comment from user: 男鬼一
Saved comment from user: auval
Saved comment from user: 肖烬严
Saved comment from user: wwwquw
Saved comment from user: 小熊溺水了oOo
Saved comment from user: 小熊溺水了oOo
Saved comment from user: 屿夕island
Saved comment from user: 腐之少女
Saved comment from user: 飞跃_无限
Saved comment from user: lCEUE
Saved comment from user: 泽译喵
Saved comment from user: iiqsy
Saved comment from user: willowwyK
Saved comment from user: willowwyK
Saved comment from user: Ccnannan酱
Saved comment from user: -唯柯
Saved comment from user: -唯柯
Saved comment from user: 千言万语化为宅
Saved comment from user: 哩卜
Saved comment from user: 御寒寺_
Saved comment from user: 爆界卡卡罗特
Saved comment from user: 猫不眠又不休
Saved comment from user: 小琉璃冰糖葫芦
Saved comment from user: 菠萝果粒奶
Saved comment from user: Dr_am
Saved comment from us

In [8]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data29.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data29.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2643535318'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: Azuzr
Saved comment from user: 斜阳已成余晖
Saved comment from user: 离小葵a
Saved comment from user: 雾起_0416
Saved comment from user: 来生有个幸福的家gpyd
Saved comment from user: 无上常融仙
Saved comment from user: 记得爱要早点睡TvT
Saved comment from user: 记得爱要早点睡TvT
Saved comment from user: qchuiii
Saved comment from user: Loveyou瑶乄瑶
Saved comment from user: 小猫去偷月了o
Saved comment from user: 我是金牛做的
Saved comment from user: 一叶知秋yo_
Saved comment from user: 亲亲就害羞
Saved comment from user: drivers香蕉皮船长
Saved comment from user: 戥暨
Saved comment from user: 喵喵喵---------------
Saved comment from user: 属于我的萤火虫
Saved comment from user: 鹿白__2
Saved comment from user: RainManO-o
Saved comment from user: 樱花不及姑娘_x_x
Saved comment from user: _-兜-兜-_
Saved comment from user: 夫妻对拜
Saved comment from user: Fu-15
Saved comment from user: 墨清莲-
Saved comment from user: 记得爱要早点睡TvT
Saved comment from user: 4399_7k7k
Saved comment from user: love9-C
Saved comment from user: CCZERO-成世龙
Saved comment from user: 

In [None]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data30.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data30.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644283374'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

In [10]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data31.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data31.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645759396'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 想枯
Saved comment from user: 周彬林
Saved comment from user: 只听法老叔叔
Saved comment from user: -JimLee
Saved comment from user: Yuz27
Saved comment from user: 待机时间吧
Saved comment from user: Zrz_un0
Saved comment from user: 记性没记性
Saved comment from user: 玉米溜溜儿
Saved comment from user: L7别对我说谎
Saved comment from user: 高坚果_ww
Saved comment from user: BobbY-zzzz
Saved comment from user: 谷神星27
Saved comment from user: smash_deke
Saved comment from user: 爱意葬与黎明
Saved comment from user: 爱意葬与黎明
Saved comment from user: -EUz_
Saved comment from user: 致死痛
Saved comment from user: 对着sasi疯狂嶋
Saved comment from user: 龙森湖潜水员
Saved comment from user: Flashlightiii
Saved comment from user: 阿文痴心绝对
Saved comment from user: 7yy_X
Saved comment from user: 别困觉
Saved comment from user: sjdhdjj
Saved comment from user: AAA北龙口建材市场黑大帅
Saved comment from user: 哥我不说唉你
Saved comment from user: 数钱的季节
Saved comment from user: 数钱的季节
Saved comment from user: 青岛金泫雅_JYS
Saved comment from user: 镜镜镜JZ

KeyboardInterrupt: 

In [11]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data32.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data32.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645870286'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 叉烧爱好者1977号
Saved comment from user: 那很幼稚吗
Saved comment from user: 九哥的文艺细胞
Saved comment from user: Alice又子
Saved comment from user: loyd03
Saved comment from user: 弱智儿童生活
Saved comment from user: 徐国调帝
Saved comment from user: 荷鲁斯HorusXXX
Saved comment from user: MEGA__
Saved comment from user: Pharaoh440
Saved comment from user: YLevoled
Saved comment from user: YLevoled
Saved comment from user: YLevoled
Saved comment from user: YLevoled
Saved comment from user: YLevoled
Saved comment from user: YLevoled
Saved comment from user: YLevoled
Saved comment from user: YLevoled
Saved comment from user: YLevoled
Saved comment from user: daydzx
Saved comment from user: 欲安yu_an
Saved comment from user: 荷鲁斯HorusXXX
Saved comment from user: 电蚊拍啊拍
Saved comment from user: 肥料包
Saved comment from user: JTmTime
Saved comment from user: 黑白郎君lzj
Saved comment from user: STEVENSSSONG
Saved comment from user: STEVENSSSONG
Saved comment from user: STEVENSSSONG
Saved comment from u

In [12]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data33.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data33.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645212878'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: Skiluv
Saved comment from user: EternalAix
Saved comment from user: Skiluv
Saved comment from user: 小璟-_-
Saved comment from user: 小璟-_-
Saved comment from user: 困死了kunsil
Saved comment from user: 1Rocket
Saved comment from user: PrinkG
Saved comment from user: 海浪__A
Saved comment from user: 因为懒全点了宅属性
Saved comment from user: SAINT_DEMON
Saved comment from user: Tracasserie_
Saved comment from user: aweirqqqqqq
Saved comment from user: c4rw
Saved comment from user: 吹瓶男孩
Saved comment from user: kkkkill1uvcloseyoureyes
Saved comment from user: 一拳揍飞烟烟
Saved comment from user: 昵称全被占用了我的天
Saved comment from user: 悲伤滑入身体
Saved comment from user: 你亦在万人之中
Saved comment from user: 吹瓶男孩
Saved comment from user: DEATH_44
Saved comment from user: 宇宙最强首席
Saved comment from user: 拼湊回憶穿透玻璃心
Saved comment from user: 昵称全被占用了我的天
Saved comment from user: 梦chin
Saved comment from user: 携手走江南
Saved comment from user: 下個秋天說再見_902
Saved comment from user: 不想被玩弄感情
Saved comment from 

In [13]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data34.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data34.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645500113'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 你就是光吧
Saved comment from user: 打扰了iii
Saved comment from user: 怎暮w
Saved comment from user: 跟我捉迷藏吗
Saved comment from user: Koimon
Saved comment from user: qa不会起名
Saved comment from user: 按时睡觉觉觉觉
Saved comment from user: 热爱藏在黎明
Saved comment from user: 1999goddess
Saved comment from user: 1999goddess
Saved comment from user: phoyk
Saved comment from user: 雪落为妍
Saved comment from user: 遗忘周的十月初雪
Saved comment from user: 笨拙小喵
Saved comment from user: kelly欢喜自在莲
Saved comment from user: 云村村民162044269076127
Saved comment from user: 高调的路过
Saved comment from user: Gniryratenalp
Saved comment from user: 开开芯芯kkxx
Saved comment from user: 珠宝店兴奋万圣节
Saved comment from user: 云村村民166898805297410
Saved comment from user: TvT1aR
Saved comment from user: 祈故愿橘平安
Saved comment from user: 黑白情漾
Saved comment from user: llRrconn
Saved comment from user: 用户1441068707
Saved comment from user: 手握星辰世间这般人
Saved comment from user: 小李飞刀txr
Saved comment from user: 落泪无痕pql
Saved comment fro

In [14]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data35.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data35.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644090425'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: Yh-8
Saved comment from user: l_cky-u
Saved comment from user: 长翅膀的蒋二狗
Saved comment from user: 圆圆也想被爱
Saved comment from user: 秋识冬岛
Saved comment from user: 天真的单先生
Saved comment from user: Sry4theWait
Saved comment from user: Soledadyu
Saved comment from user: 小熊kuma开心
Saved comment from user: 宇众不同c
Saved comment from user: YEL-YEL
Saved comment from user: 长翅膀的蒋二狗
Saved comment from user: 嫩模_
Saved comment from user: 是麻酱咩
Saved comment from user: 心已碎城已塌
Saved comment from user: 丝丝绕指柔
Saved comment from user: 十三号康娜酱
Saved comment from user: 发财小乖狗
Saved comment from user: DaQuan大全
Saved comment from user: 喜欢睡地毯
Saved comment from user: PPpige嗝
Saved comment from user: 32lullaby23
Saved comment from user: 长翅膀的蒋二狗
Saved comment from user: 长翅膀的蒋二狗
Saved comment from user: Laity-L001
Saved comment from user: 树遇叶
Saved comment from user: 放輕鬆一點
Saved comment from user: 会点说唱
Saved comment from user: 很多很多很简单
Saved comment from user: 顾安安-同频共振版
Saved comment from user: Sr

In [15]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data36.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data36.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2638526817'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 千秋帝业
Saved comment from user: Ecstasyyy--
Saved comment from user: Firing_Star
Saved comment from user: 艺术家_们
Saved comment from user: 五更狗狗
Saved comment from user: 小迉
Saved comment from user: 宗爸-
Saved comment from user: 加点油_zyf
Saved comment from user: 小困好困好困
Saved comment from user: 鼎肾
Saved comment from user: 这都是不可避免的
Saved comment from user: HEARTHT
Saved comment from user: 曾经是树德黄指导
Saved comment from user: 小BUG-BDT
Saved comment from user: 抑郁小学生__p
Saved comment from user: 顶满了一
Saved comment from user: 野原樱川
Saved comment from user: 叫我颖帝
Saved comment from user: Go_b3ok
Saved comment from user: Lilwanye
Saved comment from user: 荒诞不经的梦
Saved comment from user: _1414
Saved comment from user: iVAVi
Saved comment from user: 桃子拌汽水
Saved comment from user: 布丁咸鱼一只
Saved comment from user: 不听糜糜之音
Saved comment from user: Hustler_A
Saved comment from user: 一起吃饭_k
Saved comment from user: 宇枝狛德
Saved comment from user: 尘雨啊啊啊啊
Saved comment from user: 神秘思考者
Saved comm

In [19]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data37.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data37.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645494466'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 高唱晚
Saved comment from user: 高唱晚
Saved comment from user: 空念雪
Saved comment from user: 生薯片和洋槐蜂蜜
Saved comment from user: 高唱晚
Saved comment from user: 高唱晚
Saved comment from user: 先天失眠圣体X
Saved comment from user: 高唱晚
Saved comment from user: 木兮_GObg
Saved comment from user: 懒人伽布
Saved comment from user: rogue丶江禹
Saved comment from user: 路人酱到此一游
Saved comment from user: 一叶知秋yo_
Saved comment from user: 高唱晚
Saved comment from user: 高唱晚
Saved comment from user: 桃井缨妍
Saved comment from user: 午夜伤感猛女_dcx
Saved comment from user: 高唱晚
Saved comment from user: 高唱晚
Saved comment from user: 无尽晚风C
Saved comment from user: 高唱晚
Saved comment from user: 高唱晚
Saved comment from user: 108trap
Saved comment from user: 氛围感_zx
Saved comment from user: 少说欺骗的话
Saved comment from user: 高唱晚
Saved comment from user: 午夜伤感猛女_dcx
Saved comment from user: 冰冷的黄桃罐头
Saved comment from user: 欲皇笑到傻
Saved comment from user: VIP-徐晓桐
Saved comment from user: 汐芰最可爱
Saved comment from user: 诸葛耳边轻喘
Sav

In [18]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data38.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data38.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645124376'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 非常hentai
Saved comment from user: 忘却生舍
Saved comment from user: 空念雪
Saved comment from user: 牛仔土豆
Saved comment from user: 牛仔土豆
Saved comment from user: C_Cya
Saved comment from user: 21gBeauvois
Saved comment from user: 我爱你和你的猫
Saved comment from user: 犯罪绝无仅有
Saved comment from user: 活了个门活了多久
Saved comment from user: 淋惯了雨的人
Saved comment from user: 诺艾尔i
Saved comment from user: ReYooooooo
Saved comment from user: ReYooooooo
Saved comment from user: Seven柒七-7
Saved comment from user: 有趣哥别太YQ
Saved comment from user: 有趣哥别太YQ
Saved comment from user: 笠是我捡的
Saved comment from user: 自在蜻蜓
Saved comment from user: Make-you-love-me
Saved comment from user: 墨祁好像没什么用
Saved comment from user: 烧鹅小少nati
Saved comment from user: 怪-核
Saved comment from user: -微笙
Saved comment from user: 卦者那啥孓靈風yu
Saved comment from user: 茹果不再如果
Saved comment from user: 君主jru
Saved comment from user: 斿有之极
Saved comment from user: 胡尔盖茨
Saved comment from user: 想留的人_不会走
Saved comment from user:

In [None]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data39.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data39.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2643121166'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

In [22]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data40.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data40.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644175638'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 嫩模_
Saved comment from user: 6667MN
Saved comment from user: 6667MN
Saved comment from user: 被爱挺难
Saved comment from user: 摩羯羅_pUY0
Saved comment from user: 金桔0RANG3
Saved comment from user: 丘岳狸花猫-
Saved comment from user: xxxxxxxxx_you
Saved comment from user: jcbrylsxlglS
Saved comment from user: 坏女孩-g
Saved comment from user: 彼岸花开璃洛轩
Saved comment from user: 陈靓乐
Saved comment from user: 车车车车银优嗷o
Saved comment from user: 第几页的序
Saved comment from user: JianchaguanEN
Saved comment from user: 生巧卷
Saved comment from user: 余余裕裕
Saved comment from user: 落空当涂
Saved comment from user: 车车车车银优嗷o
Saved comment from user: 无比往然
Saved comment from user: 云村村民160549712089878
Saved comment from user: MY宁--
Saved comment from user: 请吃掉我的脑子
Saved comment from user: 陈誊珩
Saved comment from user: 小宇变无语了
Saved comment from user: 克服焦虑病
Saved comment from user: hush-刘北山
Saved comment from user: VJ-Karen
Saved comment from user: 归予尔尔
Saved comment from user: 梦想pfu
Saved comment from u

In [23]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data41.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data41.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644628426'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 陪妮低空飞行la
Saved comment from user: 陪妮低空飞行la
Saved comment from user: 陪妮低空飞行la
Saved comment from user: 疏散星河
Saved comment from user: t-show90426
Saved comment from user: 奶浓的绝世盛宠娇妻
Saved comment from user: 控萝莉的蜀黍怪
Saved comment from user: Palkia彡w
Saved comment from user: 谦dsv
Saved comment from user: 蛋那个蛋吖
Saved comment from user: ONE苏杨
Saved comment from user: ONE苏杨
Saved comment from user: 晷yks
Saved comment from user: 宫崎绫子
Saved comment from user: 我的依靠涛
Saved comment from user: 心碎芋圆球1
Saved comment from user: 李斯derrrrrr妮
Saved comment from user: j雨季_
Saved comment from user: j雨季_
Saved comment from user: 三亿_HJW
Saved comment from user: 只执着你一个
Saved comment from user: 纪念馆深棕黄抓娃娃达人
Saved comment from user: 晚蜂
Saved comment from user: sunknightmzp
Saved comment from user: 一鸣惊人jth
Saved comment from user: 汐蓝画语
Saved comment from user: 黄昏味的吻
Saved comment from user: 放学后一起去便利店吗
Saved comment from user: JinniWang
Saved comment from user: 濛濛Alice
Saved comment from us

In [24]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data42.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data42.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644273516'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 公主小悔
Saved comment from user: 守死
Saved comment from user: Feely22
Saved comment from user: 灼麒麟
Saved comment from user: 狗运男子
Saved comment from user: 岁岁S_sui-
Saved comment from user: 听岛屿0
Saved comment from user: 爱苏叶的崽子
Saved comment from user: 1oveY_4ever
Saved comment from user: 心最冷
Saved comment from user: 帮我拔掉氧气罩呗
Saved comment from user: 小不ONE
Saved comment from user: 人间NNNPC
Saved comment from user: bro甜芝魚虾
Saved comment from user: 笙雨亦凉180807
Saved comment from user: 小不ONE
Saved comment from user: 墨小染超级可爱
Saved comment from user: 所以呢_Ozh2
Saved comment from user: whostar7
Saved comment from user: 小新qwe
Saved comment from user: 忧伤玫瑰Nill
Saved comment from user: 小不ONE
Saved comment from user: 小不ONE
Saved comment from user: 小不ONE
Saved comment from user: 懒斯
Saved comment from user: 爱唉zx
Saved comment from user: 杨子坤kun
Saved comment from user: FOXFAlRY
Saved comment from user: 香糯红烧肉
Saved comment from user: 江渾渾
Saved comment from user: 江渾渾
Saved comment from

In [25]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data43.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data43.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645759411'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: ill-wei
Saved comment from user: FYOVERLXRD
Saved comment from user: Cigood
Saved comment from user: hx輝-
Saved comment from user: 艾维Niki_
Saved comment from user: 雷击木Lightning
Saved comment from user: Pyopotatoo
Saved comment from user: 呆呆小姜OoO
Saved comment from user: nodoubIe
Saved comment from user: 60eau
Saved comment from user: nodoubIe
Saved comment from user: 60eau
Saved comment from user: nodoubIe
Saved comment from user: CFLY-
Saved comment from user: 60eau
Saved comment from user: -Ko0
Saved comment from user: Winniethepoohlovesfish
Saved comment from user: 澄爱犯困
Saved comment from user: 爱学结果
Saved comment from user: 崔莫然_
Saved comment from user: 马志恒-1
Saved comment from user: 洋pg
Saved comment from user: 乖ckx
Saved comment from user: lilnasx2005
Saved comment from user: Jumping_jumping
Saved comment from user: Cm__J
Saved comment from user: Dukiet
Saved comment from user: 我代表你们
Saved comment from user: 小麻swimming
Saved comment from user: 冰美式不要冰-_-
Sa

In [26]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data44.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data44.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2646006564'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 誰hui替你来愛我
Saved comment from user: 卑微的Weiii
Saved comment from user: 踢易易
Saved comment from user: -Dopesniper-
Saved comment from user: 踢易易
Saved comment from user: BlankLoopy
Saved comment from user: 最耀眼的星尘
Saved comment from user: 踢易易
Saved comment from user: 踢易易
Saved comment from user: 路还长_你别太狂_
Saved comment from user: 黎月之晓
Saved comment from user: 我萝莉控骄傲
Saved comment from user: 饿死的鱼_yjx
Saved comment from user: As刘沅伸
Saved comment from user: 武夷麟
Saved comment from user: 踢易易
Saved comment from user: 踢易易
Saved comment from user: hirako2004
Saved comment from user: G4RealFrom010
Saved comment from user: 踢易易
Saved comment from user: 踢易易
Saved comment from user: Zag梓艾格
Saved comment from user: 肆伍Fortyfive
Saved comment from user: 派_早起人格
Saved comment from user: 踢易易
Saved comment from user: 踢易易
Saved comment from user: 踢易易
Saved comment from user: 地学院知名帅哥杜同学
Saved comment from user: 地学院知名帅哥杜同学
Saved comment from user: 北暝有愚
Saved comment from user: 亮仔无敌了
Saved 

In [27]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data45.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data45.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645763003'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 情话却如风过耳
Saved comment from user: 兰枫希彦
Saved comment from user: 醉远山
Saved comment from user: 李夏砚LXY
Saved comment from user: 李夏砚LXY
Saved comment from user: 李夏砚LXY
Saved comment from user: 李夏砚LXY
Saved comment from user: ffffulosh
Saved comment from user: 什么昵称才不存在啊
Saved comment from user: _是绵绵呀
Saved comment from user: 觉要睡饱
Saved comment from user: 垃圾也向阳
Saved comment from user: intoku-
Saved comment from user: Chaoay
Saved comment from user: 小纯妖
Saved comment from user: 带着梦赶路
Saved comment from user: 用户0197
Saved comment from user: 简单拾荒
Saved comment from user: 江海一蓑翁
Saved comment from user: 唯钟小太阳
Saved comment from user: 逍遥派鱼板主公大人
Saved comment from user: 蛟龙本该水中盘i
Saved comment from user: 時雨最終戦争
Saved comment from user: uyfytfuyivgf
Saved comment from user: 一个废人夜若
Saved comment from user: 忆念回忆里的TA
Saved comment from user: 鱼刺在吗
Saved comment from user: 不是说说而以zZ
Saved comment from user: 起名困难癌晚期患者
Saved comment from user: 墨墨YH
Saved comment from user: gubaim
Sav

In [28]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data46.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data46.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644564829'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 柳文小盆友
Saved comment from user: 柳文小盆友
Saved comment from user: 油管news
Saved comment from user: 卫弎
Saved comment from user: Monkey-Chicken
Saved comment from user: Monkey-Chicken
Saved comment from user: Monkey-Chicken
Saved comment from user: 柳文小盆友
Saved comment from user: 柳文小盆友
Saved comment from user: 柳文小盆友
Saved comment from user: 柳文小盆友
Saved comment from user: 柳文小盆友
Saved comment from user: 柳文小盆友
Saved comment from user: 凉风过寒秋
Saved comment from user: 携同温柔逃跑
Saved comment from user: ZXD_508
Saved comment from user: 壮儿6686
Saved comment from user: 壮儿6686
Saved comment from user: 取名不了许多
Saved comment from user: Endofu
Saved comment from user: OwO念念
Saved comment from user: im麻薯
Saved comment from user: 华灯初上冰雪者的低语
Saved comment from user: 想要当一个诗人
Saved comment from user: 咚咚_u
Saved comment from user: 辣味滋堡
Saved comment from user: 圣代终结者
Saved comment from user: 我有一支仙女棒--
Saved comment from user: 41UxU
Saved comment from user: 我有一支仙女棒--
Saved comment from user: 微

KeyboardInterrupt: 

In [29]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data47.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data47.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2642292188'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 笙袆
Saved comment from user: Flipped_Limerence
Saved comment from user: 基础题两小时解不出
Saved comment from user: 忧郁是我的本性
Saved comment from user: Sivan大木
Saved comment from user: 别忘苏
Saved comment from user: 别忘苏
Saved comment from user: 云村村民-IQ18-0
Saved comment from user: 幽怨人心
Saved comment from user: 远离窝V
Saved comment from user: 祈陆安77
Saved comment from user: Tomber_sur
Saved comment from user: 溫For
Saved comment from user: Deepsea-唐
Saved comment from user: 独家颖片angela
Saved comment from user: 西瓜汁2101
Saved comment from user: 灵炅Clever
Saved comment from user: 翎諭T
Saved comment from user: 愛月_ll
Saved comment from user: 迢少
Saved comment from user: 银河映像j
Saved comment from user: 倾汀g
Saved comment from user: page-YanYan
Saved comment from user: 不道取什么名1
Saved comment from user: 颜控美少女控正太控声控的z夫人
Saved comment from user: Anna安娜呀_
Saved comment from user: 映雨薇薇
Saved comment from user: 笑口常开a心想事成
Saved comment from user: 祁懿z
Saved comment from user: 辽里潦草的人
Saved comment from 

In [33]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data48.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data48.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2644303288'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 由来一声笑
Saved comment from user: 楚念玖
Saved comment from user: 天道呵呵
Saved comment from user: 往事尘不到
Saved comment from user: 曾经是过不去的坎
Saved comment from user: 登山者必胜
Saved comment from user: 小圓滾頭
Saved comment from user: 宜拾壹
Saved comment from user: 枝头千点雪
Saved comment from user: 你吃了吗12
Saved comment from user: 咯吱窝里的故事
Saved comment from user: 筠雪芊寒
Saved comment from user: AfricAsiaN
Saved comment from user: 热爱每一个春天
Saved comment from user: 烏雲豹哥哥
Saved comment from user: 米津浅兮
Saved comment from user: Teleock
Saved comment from user: EIabismo
Saved comment from user: 德马基亚
Saved comment from user: S青火
Saved comment from user: 隐形坠机
Saved comment from user: 凉风有讯T
Saved comment from user: 乐与乐
Saved comment from user: 大朵_tQ0L
Saved comment from user: 山雁不知心底事
Saved comment from user: 余生还长足够我们一生疯狂
Saved comment from user: 名为Eligos
Saved comment from user: oOscar-Lee
Saved comment from user: 加载-_
Saved comment from user: Lana日记
Saved comment from user: 星辰夜愿
Saved comment fro

In [34]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data49.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data49.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2645389884'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: 无爱别装
Saved comment from user: 小羊的黑胶店-
Saved comment from user: 小羊的黑胶店-
Saved comment from user: 嘲鸫mock-joke
Saved comment from user: 丿Ares丨荣耀
Saved comment from user: 小废物_tZcg
Saved comment from user: 是欣欣欣欣_
Saved comment from user: 无解_2ppc
Saved comment from user: Cookie-yww
Saved comment from user: 唁筵
Saved comment from user: i听Eason
Saved comment from user: Sadniblet
Saved comment from user: Sadniblet
Saved comment from user: 梦A啦多-
Saved comment from user: 小贤的假想
Saved comment from user: 暗地
Saved comment from user: 落花鱼贩子
Saved comment from user: 药不能停tng
Saved comment from user: 梦屿千寻inl
Saved comment from user: 你是猪吗卍
Saved comment from user: 啵zjy
Saved comment from user: 故事讲给风听16
Saved comment from user: tleessang
Saved comment from user: k1ss星
Saved comment from user: 佳Ovoh
Saved comment from user: 胡尔盖茨
Saved comment from user: 麻雀有歌
Saved comment from user: zZAL1
Saved comment from user: Yeez1004
Saved comment from user: 钝剑有锋
Saved comment from user: 埃洛特奈斯
Sa

In [35]:
import json
import time
import requests
import os.path
import sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class NeteaseMusicScraper:
    def __init__(self):
        self.headers = {
            'Host': 'music.163.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Referer': 'https://music.163.com/'
        }
        
    def get_user_info(self, user_id):
        data = {
            'gender': '无',
            'age': '无',
            'city': '无',
            'sign': '无'
        }
        
        url = f'https://music.163.com/api/v1/user/detail/{user_id}'
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                response = requests.get(url=url, headers=self.headers, timeout=10)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('code') == 200 and 'profile' in result:
                        profile = result['profile']
                        data['gender'] = profile.get('gender', '无')
                        birthday = profile.get('birthday', 0)
                        
                        if birthday > 0:
                            age = (2024 - 1970) - (birthday // (1000 * 365 * 24 * 3600))
                            data['age'] = max(0, age)
                            
                        data['city'] = profile.get('city', '无')
                        data['sign'] = profile.get('signature', '无').strip().replace('\n', ' ').replace(',', '，')
                break
            except Exception as e:
                print(f"Error getting user info for {user_id}: {str(e)}")
                retry_count += 1
                time.sleep(3)
                
        return data

    def get_comments(self, song_id, offset=0, limit=100):
        url = f'http://music.163.com/api/v1/resource/comments/R_SO_4_{song_id}?limit={limit}&offset={offset}'
        try:
            response = requests.get(url=url, headers=self.headers)
            return response.json()
        except Exception as e:
            print(f"Error getting comments: {str(e)}")
            return None

    def save_comment(self, data, filename='comments_data50.csv'):
        with open(filename, 'a', encoding='utf-8-sig') as f:
            f.write(','.join(map(str, data)) + '\n')

    def create_csv_header(self, filename='comments_data50.csv'):
        headers = ['nickname', 'user_id', 'age', 'gender', 'city', 'signature', 
                  'comment_content', 'comment_id', 'like_count', 'comment_time']
        with open(filename, 'w', encoding='utf-8-sig') as f:
            f.write(','.join(headers) + '\n')

    def scrape_comments(self, song_id):
        offset = 0
        limit = 100
        
        while True:
            result = self.get_comments(song_id, offset, limit)
            if not result or 'comments' not in result:
                break
                
            comments = result['comments']
            if not comments:
                break
                
            for comment in comments:
                try:
                    user = comment.get('user', {})
                    user_id = user.get('userId')
                    nickname = user.get('nickname', '')
                    
                    user_info = self.get_user_info(user_id)
                    comment_time = time.strftime("%Y-%m-%d %H:%M:%S", 
                                               time.localtime(int(str(comment['time'])[:10])))
                    
                    data = [
                        nickname,
                        user_id,
                        user_info['age'],
                        user_info['gender'],
                        user_info['city'],
                        user_info['sign'],
                        comment.get('content', '').replace(',', '，'),
                        comment.get('commentId', ''),
                        comment.get('likedCount', 0),
                        comment_time
                    ]
                    
                    self.save_comment(data)
                    print(f"Saved comment from user: {nickname}")
                    time.sleep(0.5)  # Avoid too frequent requests
                    
                except Exception as e:
                    print(f"Error processing comment: {str(e)}")
                    continue
            
            offset += limit
            print(f"Processed {offset} comments")
            
            if len(comments) < limit:
                break  # Break the loop if fewer comments than limit are returned
            
            time.sleep(1)  # Delay between pages

def main():
    scraper = NeteaseMusicScraper()
    song_id = '2640560006'  # Replace with your target song ID
    scraper.create_csv_header()
    scraper.scrape_comments(song_id)

if __name__ == '__main__':
    main()

Saved comment from user: XISSMIAN
Saved comment from user: XISSMIAN
Saved comment from user: 24b_
Saved comment from user: XISSMIAN
Saved comment from user: 了了烟_w
Saved comment from user: XISSMIAN
Saved comment from user: 祝愿悦无忧
Saved comment from user: 少做梦_1
Saved comment from user: XISSMIAN
Saved comment from user: 始终不渝_0Thd
Saved comment from user: 梦想成为学渣
Saved comment from user: Yyu雯
Saved comment from user: XISSMIAN
Saved comment from user: 第-页序
Saved comment from user: XISSMIAN
Saved comment from user: 24b_
Saved comment from user: XISSMIAN
Saved comment from user: 嫩模_
Saved comment from user: XISSMIAN
Saved comment from user: XISSMIAN
Saved comment from user: 黄昏味的吻
Saved comment from user: XISSMIAN
Saved comment from user: XISSMIAN
Saved comment from user: XISSMIAN
Saved comment from user: bellehi
Saved comment from user: XISSMIAN
Saved comment from user: 让我创造一个有你的世界
Saved comment from user: XISSMIAN
Saved comment from user: 尊重女性不开黄腔
Saved comment from user: FrAnKpdc
Saved commen