In [5]:
import requests
from bs4 import BeautifulSoup
import yt_dlp

# 设置请求头，模拟浏览器请求
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}


def get_video_info(video_url):
    response = requests.get(video_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        # 获取视频标题
        title_tag = soup.find('h1', class_='video-title')
        title = title_tag.get_text(strip=True) if title_tag else 'N/A'

        # 获取视频播放量
        play_count_tag = soup.find('span', class_='view')
        play_count = play_count_tag.get_text(strip=True) if play_count_tag else 'N/A'

        # 获取视频点赞数
        like_count_tag = soup.find('span', class_='like')
        like_count = like_count_tag.get_text(strip=True) if like_count_tag else 'N/A'

        # 获取视频弹幕数
        danmu_count_tag = soup.find('span', class_='dm')
        danmu_count = danmu_count_tag.get_text(strip=True) if danmu_count_tag else 'N/A'

        video_info = {
            'title': title,
            'play_count': play_count,
            'like_count': like_count,
            'danmu_count': danmu_count
        }

        return video_info
    else:
        print(f"Failed to retrieve video page, status code: {response.status_code}")
        return None


def list_formats(video_url):
    ydl_opts = {
        'listformats': True  # 列出可用格式
    }
    # 对资源控制，结束后自动释放
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.extract_info(video_url, download=False)


def download_video(video_url, output_path, format_id='best'):
    ydl_opts = {
        'outtmpl': output_path + '/%(title)s.%(ext)s',  # 保存路径和文件名模板
        'format': format_id  # 下载指定格式
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])


if __name__ == "__main__":
    video_url = 'https://www.bilibili.com/video/BV1xx411c7mD'  # 替换为你想爬取的视频URL
    video_info = get_video_info(video_url)
    if video_info:
        print("Video Title:", video_info['title'])
        print("Play Count:", video_info['play_count'])
        print("Like Count:", video_info['like_count'])
        print("Danmu Count:", video_info['danmu_count'])

        # 列出可用格式
        print("Available formats:")
        list_formats(video_url)

        # 下载视频
        output_path = './downloads'  # 设置下载路径
        format_id = '100023'  # 根据之前列出的格式选择合适的格式ID
        download_video(video_url, output_path, format_id)
        print(f"Video downloaded to {output_path}")

Video Title: 字幕君交流场所
Play Count: N/A
Like Count: N/A
Danmu Count: N/A
Available formats:
[BiliBili] Extracting URL: https://www.bilibili.com/video/BV1xx411c7mD
[BiliBili] 1xx411c7mD: Downloading webpage
[BiliBili] BV1xx411c7mD: Extracting videos in anthology
[BiliBili] 2: Extracting chapters
[info] Available formats for BV1xx411c7mD:
ID     EXT RESOLUTION FPS |  FILESIZE  TBR PROTO | VCODEC         VBR ACODEC      ABR
-------------------------------------------------------------------------------------
30216  m4a audio only     | ≈16.82MiB  69k https | audio only         mp4a.40.2   69k
30232  m4a audio only     | ≈33.01MiB 135k https | audio only         mp4a.40.2  135k
30016  mp4 480x360     15 | ≈38.14MiB 156k https | avc1.64001E   156k video only
30011  mp4 480x360     15 | ≈24.01MiB  98k https | hev1.1.6.L120  98k video only
100022 mp4 480x360     15 | ≈50.59MiB 206k https | av01.0.01M.08 206k video only
30032  mp4 512x384     15 | ≈38.18MiB 156k https | avc1.64001E   156k video o

In [None]:
import requests
from bs4 import BeautifulSoup
import yt_dlp

# 设置请求头，模拟浏览器请求
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}


def get_video_urls(category_url):
    response = requests.get(category_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        print(soup)
        # 这里假设每个视频链接在某个特定的 HTML 标签中，例如 <a class="video-link" href="...">
        video_links = soup.find_all('a', class_='video-link')
        print(video_links)
        video_urls = ['https://www.bilibili.com' + link['href'] for link in video_links]

        return video_urls
    else:
        print(f"Failed to retrieve category page, status code: {response.status_code}")
        return []


category_url = 'https://www.bilibili.com/v/tech/digital'  # 替换为你想爬取的类别URL

# 获取指定类型下的视频URL列表
video_urls = get_video_urls(category_url)

# 循环下载每个视频
for video_url in video_urls:
    print(f"Downloading video from {video_url}")