In [3]:
import os
import re
import sys
import json
import requests
import datetime
import subprocess
import xml.etree.ElementTree as ET

#默认参数
default_config = {
    "channelid_youtube": {
        "youtube": {
            "update_size": 5,
            "id": "UCBR8-60-B28hp2BmDPdntcQ",
            "title": "YouTube",
            "quality": "480",
            "last_size": 20,
            "media": "m4a"
        }
    }
}

In [4]:
#日志模块
def write_log(log):
    # 获取当前的具体时间
    current_time = datetime.datetime.now()
    # 格式化输出，只保留年月日时分秒
    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
    # 打开文件，并读取原有内容
    try:
        with open("log.txt", "r") as file:
            contents = file.read()
    except FileNotFoundError:
        contents = ""
    # 将新的日志内容添加在原有内容之前
    new_contents = f"{formatted_time} {log}\n{contents}"
    # 将新的日志内容写入文件
    with open("log.txt", "w") as file:
        file.write(new_contents)
    # 打印当前的具体时间（精确到秒）
    print(formatted_time, log)

In [5]:
# 安装库模块
def library_install(library):
    # 检查库是否已安装
    def is_library_installed():
        try:
            result = subprocess.run([library , '--version'], capture_output=True, text=True)
            if result.returncode == 0:
                return True
            else:
                return False
        except FileNotFoundError:
            return False
    # 如果库未安装，则尝试安装
    def install_library():
        try:
            result = subprocess.run(['pip', 'install', library , '-U'], capture_output=True, text=True)
            if result.returncode == 0:
                return True
            else:
                return False
        except FileNotFoundError:
            return False
    # 如果库已安装，则尝试更新
    def update_library():
        try:
            result = subprocess.run(['pip', 'install', '--upgrade', library], capture_output=True, text=True)
            if result.returncode == 0:
                return True
            else:
                return False
        except FileNotFoundError:
            return False
    # 检查库是否已安装
    if is_library_installed():
        write_log(library + "已安装")
    else:
        write_log(library + "未安装")
    # 如果库已安装，则尝试更新
    if is_library_installed():
        if update_library():
            write_log(library + "更新成功")
        else:
            write_log(library + "更新失败")
    else:  # 如果库未安装，则尝试安装
        if install_library():
            write_log(library + "安装成功")
        else:
            write_log(library + "安装失败")

In [8]:
# 安装/更新yt-dlp
library_install("yt-dlp")
# 安装/更新pytube
library_install("pytube")
# 安装/更新ffmpeg
library_install("ffmpeg")


2023-08-15 03:17:40 yt-dlp已安装
2023-08-15 03:17:43 yt-dlp更新成功
2023-08-15 03:17:43 pytube已安装
2023-08-15 03:17:44 pytube更新成功
2023-08-15 03:17:44 ffmpeg-python未安装
2023-08-15 03:17:48 ffmpeg-python安装成功


In [5]:
# 检查当前文件夹中是否存在config.json文件
if not os.path.exists('config.json'):
    # 如果文件不存在，创建并写入默认字典
    with open('config.json', 'w') as file:
        json.dump(default_config, file, indent=4)
    write_log("不存在配置文件，已新建，默认频道")
    config = default_config
else:
    # 如果文件存在，读取字典并保存到config变量中
    with open('config.json', 'r') as file:
        config = json.load(file)
    write_log("已读取配置文件")

2023-08-14 06:12:54 已读取配置文件


In [6]:
# 从配置文件中获取YouTube的频道
if 'channelid_youtube' in config:
    channelid_youtube = config["channelid_youtube"]
    write_log("已读取youtube频道信息")
else:
    channelid_youtube = None
    write_log("youtube频道信息不存在")
# 从配置文件中获取bilibili的频道
if 'channelid_bilibili' in config:
    channelid_bilibili = config["channelid_bilibili"]
    write_log("已读取bilibili频道信息")
else:
    channelid_bilibili = None
    write_log("bilibili频道信息不存在")

2023-08-14 06:12:54 已读取youtube频道信息
2023-08-14 06:12:54 bilibili频道信息不存在


In [7]:
# 构建文件夹channel_id
folder_path_channel_ids = os.path.join(os.getcwd(), "channel_id")
if not os.path.exists(folder_path_channel_ids):  # 判断文件夹是否存在
    os.makedirs(folder_path_channel_ids)  # 创建文件夹
    write_log("文件夹channel_id创建成功")

In [8]:
# 视频分辨率变量
youtube_dpi = ["144", "180", "216", "240", "360", "480", "720", "1080", "1440", "2160", "4320"]
youtube_media = ["m4a", "mp4"]
# 复制字典youtube-channelid，遍历复制后的字典进行操作以避免在循环中删除元素导致的迭代错误
channelid_youtube_copy = channelid_youtube.copy()
# 对youtube-channelid的错误进行更正
for channelid_youtube_key, channelid_youtube_value in channelid_youtube_copy.items():
    # 判断id是否正确
    if 'id' not in channelid_youtube_value or not re.search(r"UC.{22}", channelid_youtube_value['id']):
        # 删除错误的
        del channelid_youtube[channelid_youtube_key]
        write_log(f"YouTube频道 {channelid_youtube_key} ID不正确")
    else:
        # 对update_size进行纠正
        if 'update_size' not in channelid_youtube_value or not (isinstance(channelid_youtube_value['update_size'], int) and channelid_youtube_value['update_size'] > 0):
            channelid_youtube[channelid_youtube_key]['update_size'] = 5
        # 对id进行纠正
        channelid_youtube[channelid_youtube_key]['id'] = re.search(r"UC.{22}", channelid_youtube_value['id']).group()
        # 对last_size进行纠正
        if 'last_size' not in channelid_youtube_value or not (isinstance(channelid_youtube_value['last_size'], int) and channelid_youtube_value['last_size'] > 0):
            channelid_youtube[channelid_youtube_key]['last_size'] = 20
        # 对title进行纠正
        if 'title' not in channelid_youtube_value:
            channelid_youtube[channelid_youtube_key]['title'] = channelid_youtube_key
        # 对quality进行纠正
        if ('quality' not in channelid_youtube_value or not channelid_youtube_value['quality'] in youtube_dpi) and 'media' in channelid_youtube_value and channelid_youtube_value['media'] == "mp4":
            channelid_youtube[channelid_youtube_key]['quality'] = "480"
        # 对media进行纠正
        if 'media' not in channelid_youtube_value or not channelid_youtube_value['media'] in youtube_media:
            channelid_youtube[channelid_youtube_key]['media'] = "m4a"

In [9]:
# 读取youtube频道的id
if channelid_youtube is not None:
    channelid_youtube_ids = dict({channel["id"]: key for key, channel in channelid_youtube.items()})
    write_log("读取youtube频道的channelid成功")
else:
    channelid_youtube_ids = None
# 读取bilibili频道的id
if channelid_bilibili is not None:
    channelid_bilibili_ids = [channelid_bilibili[key]['id'] for key in channelid_bilibili]
    write_log("读取bilibili频道的channelid成功")
else:
    channelid_bilibili_ids = None

2023-08-14 06:12:54 读取youtube频道的channelid成功


In [10]:
# 更新Youtube频道xml
channelid_youtube_ids_update = {}  #创建需更新的频道
youtube_content_ytid_update = {}  #创建需更新列表
for youtube_key, youtube_value in channelid_youtube_ids.items():
    # 构建 URL
    youtube_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={youtube_key}"
    # 发送请求并获取响应内容
    youtube_response = requests.get(youtube_url)
    youtube_content = youtube_response.text
    # 判断频道id是否正确
    pattern_youtube404 = r"Error 404"  # 设置要匹配的正则表达式模式
    pattern_youtube_vary = r'([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-2][0-9]:[0-6][0-9]:[0-6][0-9]\+00:00)?(starRating count="[0-9]*")?(statistics views="[0-9]*")?(<id>yt:channel:(UC.{22})?</id>)?(<yt:channelId>(UC.{22})?</yt:channelId>)?'
    if not re.search(pattern_youtube404, youtube_content):
        youtube_content = re.sub(pattern_youtube_vary, '', youtube_content)
        # 读取原Youtube频道xml文件并判断是否要更新
        try:
            with open(f"channel_id/{youtube_key}.txt", 'r', encoding='utf-8') as file:  # 打开文件进行读取
                youtube_content_original = file.read()  # 读取文件内容
            if youtube_content != youtube_content_original:  #判断是否要更新
                channelid_youtube_ids_update[youtube_key] = youtube_value
        except FileNotFoundError:  #文件不存在直接更新
            channelid_youtube_ids_update[youtube_key] = youtube_value
        # 构建文件路径
        file_path = os.path.join(folder_path_channel_ids, f"{youtube_key}.txt")
        # 构建文件路径
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(youtube_content)
            write_log(f"YouTube频道 {youtube_value} 已更新")
        # 构建频道文件夹
        folder_path_channel_id = os.path.join(os.getcwd(), youtube_key)
        if not os.path.exists(folder_path_channel_id):  # 判断文件夹是否存在
            os.makedirs(folder_path_channel_id)  # 创建文件夹
            write_log(f"文件夹{youtube_key}创建成功")
        #获取Youtube视频ID列表
        youtube_content_ytid = re.findall(r"(?<=<id>yt:video:).{11}(?=</id>)", youtube_content)
        youtube_content_ytid = youtube_content_ytid[:channelid_youtube[youtube_value]['update_size']]
        #获取已下载媒体名称
        youtube_content_ytid_original = [os.path.splitext(file)[0] for file in os.listdir(youtube_key) if file.endswith(channelid_youtube[youtube_value]['media'])]
        youtube_content_ytid = [exclude for exclude in youtube_content_ytid if exclude not in youtube_content_ytid_original]
        if youtube_content_ytid:
            channelid_youtube_ids_update[youtube_key] = youtube_value
            youtube_content_ytid_update[youtube_key] = youtube_content_ytid
    else:
        write_log(f"YouTube频道 {youtube_value} ID不正确无法获取")
        del channelid_youtube_ids[youtube_key]  # 删除错误ID
if channelid_youtube_ids_update:
    write_log(f"需更新的YouTube频道:{', '.join(channelid_youtube_ids_update.values())}")

2023-08-14 06:12:54 YouTube频道 youtube 已更新
2023-08-14 06:12:54 YouTube频道 stone记 已更新
2023-08-14 06:12:54 需更新的YouTube频道:youtube, stone记


In [19]:
print(youtube_content_ytid_update)
for key, values in youtube_content_ytid_update.items():
    for value in values:
        youtube_ytid_update = {}
        youtube_ytid_update[value] = key
print(youtube_ytid_update)

{'UCBR8-60-B28hp2BmDPdntcQ': ['5Ht2M3awcYQ', 'sZ7X35Lnvso', 'SxhSSQ28y9U', 'gPDxEL1yKbU', 'GTEOm0lG3pY'], 'UCghLs6s95LrBWOdlZUCH4qw': ['qGtP1i1n2bk', 'GJYSsFaU_hM', 'I-IOdZDR9mg', 'uoxAIJo2E7M', 'WDMadW5RO9w']}
{'WDMadW5RO9w': 'UCghLs6s95LrBWOdlZUCH4qw'}


In [13]:
def read_text_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        return content
    except FileNotFoundError:
        print(f"File '{file_path}' not found.")
        return None

# 调用read_text_file函数来读取文件内容
file_path = 'channel_id/UCghLs6s95LrBWOdlZUCH4qw.txt'
text_content = read_text_file(file_path)

if text_content:
    print(text_content)

<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns:yt="http://www.youtube.com/xml/schemas/2015" xmlns:media="http://search.yahoo.com/mrss/" xmlns="http://www.w3.org/2005/Atom">
 <link rel="self" href="http://www.youtube.com/feeds/videos.xml?channel_id=UCghLs6s95LrBWOdlZUCH4qw"/>
 
 
 <title>stone记</title>
 <link rel="alternate" href="https://www.youtube.com/channel/UCghLs6s95LrBWOdlZUCH4qw"/>
 <author>
  <name>stone记</name>
  <uri>https://www.youtube.com/channel/UCghLs6s95LrBWOdlZUCH4qw</uri>
 </author>
 <published></published>
 <entry>
  <id>yt:video:qGtP1i1n2bk</id>
  <yt:videoId>qGtP1i1n2bk</yt:videoId>
  
  <title>欧洲在疯狂囤积中国的太阳能光伏组件（20230812第6247期）</title>
  <link rel="alternate" href="https://www.youtube.com/watch?v=qGtP1i1n2bk"/>
  <author>
   <name>stone记</name>
   <uri>https://www.youtube.com/channel/UCghLs6s95LrBWOdlZUCH4qw</uri>
  </author>
  <published></published>
  <updated></updated>
  <media:group>
   <media:title>欧洲在疯狂囤积中国的太阳能光伏组件（20230812第6247期）</media:title>
   <media:

In [14]:
def youtube_element(channelid_youtube_id , channelid_youtube_name):
    if os.path.exists(f"channel_id/{channelid_youtube_id}.txt"):
        # 如果文件存在，读取字典并保存到channelid_youtube_xml变量中
        with open(f"channel_id/{channelid_youtube_id}.txt", 'r') as file:
            channelid_youtube_root = ET.fromstring(file)
            for entry in channelid_youtube_root.findall('.//{http://www.w3.org/2005/Atom}entry'):
                channelid_youtube_title = entry.find('{http://search.yahoo.com/mrss/}group/{http://search.yahoo.com/mrss/}title').text
                print(f"标题：{channelid_youtube_title}")
    else:
        # 如果文件不存在，删除错误ID
        del channelid_youtube_ids[channelid_youtube_id]
        write_log(f"YouTube频道 {channelid_youtube_name} XML文件不存在")


youtube_element("UCghLs6s95LrBWOdlZUCH4qw","stone记")


TypeError: a bytes-like object is required, not '_io.TextIOWrapper'