...


In [None]:
import requests
import os
import googleapiclient.discovery
import nltk
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from googleapiclient.discovery import build
from collections import defaultdict

def scrape_yahoo_mojishopping_news_titles(person_name):
    # Yahoo 摩奇新闻页面 URL，替换成实际的 URL
    yahoo_url = f"https://tw.news.yahoo.com/search?p={person_name}&fr=uh3_news_web&fr2=p%3Anews%2Cm%3Asb&.tsrc=uh3_news_web"


    # 发送 HTTP 请求获取页面内容
    response = requests.get(yahoo_url)

    # 检查请求是否成功
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # 提取新闻标题
        titles = [headline.text.strip() for headline in soup.find_all('h3', class_='Mb(5px)')]

        return titles
    else:
        print(f"Error: Unable to fetch Yahoo 摩奇新闻. Status code: {response.status_code}")
        return None

def analyze_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    sentiment_score = sia.polarity_scores(text)['compound']

    if sentiment_score >= 0.05:
        return 'positive'
    elif sentiment_score <= -0.01:
        return 'negative'
    else:
        return 'neutral'

def get_video_views(api_key, video_id):
    youtube = build("youtube", "v3", developerKey=api_key)

    # 获取视频的统计信息
    video_response = youtube.videos().list(
        id=video_id,
        part="statistics"
    ).execute()

    # 提取视频的观看次数
    video_views = int(video_response["items"][0]["statistics"]["viewCount"])

    return video_views

def calculate_statistics(api_key, target_youtuber_username):
    youtube = build("youtube", "v3", developerKey=api_key)

    # 使用 search.list API 查询包含目标台湾 YouTuber 在内的所有频道
    channels_response = youtube.search().list(
        q="台灣 " + target_youtuber_username,
        type="channel",
        part="id",
        maxResults=1  # 仅获取一个频道
    ).execute()

    if not channels_response["items"]:
        print(f"找不到 YouTuber：{target_youtuber_username}")
        return
    # 获取目标台湾 YouTuber 的频道 ID
    target_channel_id = channels_response["items"][0]["id"]["channelId"]

    # 使用 search.list API 查询包含目标台湾 YouTuber 在内的所有视频
    all_videos_info = defaultdict(list)

    for i in range(5):
        videos_response = youtube.search().list(
            q="台灣 " + target_youtuber_username,
            type="video",
            part="id,snippet",
            maxResults=50,  # 可根据需要调整
            pageToken=None if i == 0 else videos_response.get("nextPageToken")
        ).execute()

        # 获取视频的信息
        for item in videos_response["items"]:
            video_id = item["id"]["videoId"]
            video_title = item["snippet"]["title"]
            video_description = item["snippet"]["description"]

            # 检查标题和简介中是否包含目标台湾 YouTuber 的用户名
            if target_youtuber_username.lower() in video_title.lower() or target_youtuber_username.lower() in video_description.lower():
                all_videos_info[target_channel_id].append({"title": video_title, "video_id": video_id})

    # 统计出现次数和观看次数
    total_appearances_count = 0
    total_other_channel_views_before_appearances = 0
    total_other_channel_views_after_appearances = 0

    for channel_id, video_info_list in all_videos_info.items():
        appearances_count = len(video_info_list)
        total_appearances_count += appearances_count

        # 获取其他频道的统计信息
        other_channel_response = youtube.channels().list(
            id=channel_id,
            part="statistics"
        ).execute()

        # 获取其他频道的总观看次数（出现前）
        total_other_channel_views_before_appearances += int(other_channel_response["items"][0]["statistics"]["viewCount"])

        # 计算其他频道在出现后这些影片的总观看次数
        total_other_channel_views_after_appearances += sum([get_video_views(api_key, video_info["video_id"]) for video_info in video_info_list])

    # 计算平均观看次数的增长率
    if total_appearances_count > 0:
        average_views_before_appearances = total_other_channel_views_before_appearances / total_appearances_count
        average_views_after_appearances = total_other_channel_views_after_appearances / total_appearances_count
        views_growth_rate = ((average_views_after_appearances - average_views_before_appearances) / average_views_before_appearances) * 100
    else:
        views_growth_rate = 0

    return views_growth_rate


def calculate_score(views_growth_rate):
    if views_growth_rate > 0:
        # 如果增长率为正数，则满分20分
        score = 20
    else:
        # 如果增长率为0或负数，则根据比例扣分，最低分为0分
        penalty_percentage = min(1, abs(views_growth_rate) / 100)  # 取比例的绝对值，最大为1
        score = max(0, 20 - 20 * penalty_percentage)  # 最低分为0分

    return score

def calculate_grade(score):
    if score >= 80:
        return "A+"
    elif  70<= score < 80:
        return "A"
    elif 60 <= score < 70:
        return "A-"
    elif 50 <= score < 60:
        return "B"
    elif 40 <= score < 50:
        return "C"
    elif 30 <= score < 40:
        return "D"
    else:
        return "F"
#############################################
# 设置API密钥（需要通过Google Cloud Console创建一个YouTube API密钥）
os.environ["GOOGLE_API_KEY"] = "AIzaSyC-s3oklbPF3nUmkXazCJPGzwkPp70W-U8"

# 初始化YouTube Data API客户端
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=os.environ["GOOGLE_API_KEY"])

# 获取频道的ID
channel_id = "UCPRWWKG0VkBA0Pqa4Jr5j0Q"

# 获取频道的最近发布的5个视频
video_data = []

channel_response = youtube.search().list(
    channelId=channel_id,
    order="date",
    type="video",
    part="id",
    maxResults=5
).execute()

# 初始化情感分析器
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

positive_count = 0
negative_count = 0
neutral_count = 0


# 获取视频的评论、观看数和点赞数，并执行情感分析
for item in channel_response.get("items", []):
    video_id = item["id"]["videoId"]
    comments = []
    next_page_token = None

    while True:
        comment_response = youtube.commentThreads().list(
            videoId=video_id,
            textFormat="plainText",
            part="snippet",
            maxResults=100,
            pageToken=next_page_token
        ).execute()

        for comment_thread in comment_response.get("items", []):
            text = comment_thread["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            sentiment = sia.polarity_scores(text)

            # 判断情感并计数
            if sentiment['compound'] >= 0.05:
                comments.append("正向")
                positive_count+=1
            elif sentiment['compound'] <= -0.02:
                comments.append("负向")
                negative_count +=1
            else:
                comments.append("中性")
                positive_count +=1

        next_page_token = comment_response.get("nextPageToken")

        if not next_page_token:
            break

    # 获取视频的观看数和点赞数
    video_response = youtube.videos().list(
        part="statistics",
        id=video_id
    ).execute()

    video_statistics = video_response.get("items", [])[0]["statistics"]
    views = int(video_statistics["viewCount"])
    likes = int(video_statistics["likeCount"])

    video_data.append({
        "video_id": video_id,
        "comments": comments,
        "views": views,
        "likes": likes
    })

###########################################################################################
# 输入你的 API 密钥和要查询的台湾 YouTuber 的用户名
    api_key = "AIzaSyC-s3oklbPF3nUmkXazCJPGzwkPp70W-U8"
    target_youtuber_username = "Joeman"

# 调用函数
    growth_percentage = calculate_statistics(api_key, target_youtuber_username)
    #input
    growth_score = calculate_score(growth_percentage)
    person_name = "Joeman"
    # 获取新闻标题
    news_titles = scrape_yahoo_mojishopping_news_titles(person_name)

    news_positive_count = 0
    news_negative_count = 0
    if news_titles:
        # 打印新闻标题
        for title in news_titles:
            sentiment = analyze_sentiment(title)

            if sentiment == 'positive':
              news_positive_count += 1
            elif sentiment == 'negative':
              news_negative_count += 1
            else:
              news_positive_count += 1
    else:
        print("获取新闻标题失败。")

    news_score = news_positive_count

# 计算总点赞数和总观看数
total_likes = sum(video_info['likes'] for video_info in video_data)
total_views = sum(video_info['views'] for video_info in video_data)

# 将分数转换为20%的得分
score = (total_likes / total_views) * 100
final_score = (score / 4) * 20

#將評論分數轉為得分
positive_reviews = positive_count
negative_reviews = negative_count

# 计算正面评价和负面评价的比例
ratio = positive_reviews / negative_reviews

# 根据比例来计算分数，以满分20分，比例为5：1为标准
comment_score = 20 - max(0, (5 - ratio) * 4)

# 打印结果
print(f"newsScore:",news_score)
print("commentscore",comment_score)

print("20%的得分:", final_score)

print("20%增長率：",growth_score)

print("scoreeeee:",final_score+comment_score+news_score)

grade_score = final_score+comment_score+news_score+growth_score

final_score = calculate_grade(grade_score)

final_score