In [41]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import psycopg2
import password as pw
import time

In [42]:
def download_data(url:str) -> list[list]:    
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'}
    r = requests.get(url, headers=headers)
    r.encoding = 'utf8'
    if r.status_code == 200:
        print(f'請求成功：{r.status_code}')
    else:
        print(f'請求失敗：{r.status_code}')
    intro_data = BeautifulSoup(r.text, 'html.parser')
    anime_infos = intro_data.select('.theme-list-main')
    anime_data = []
    for anime_info in anime_infos:
        show_view_number = anime_info.select_one('.show-view-number > p').text.strip()
        anime_name = anime_info.select_one('.theme-name').text.strip()
        anime_time = anime_info.select_one('.theme-time').text.strip().replace('年份：','')
        anime_episode = anime_info.select_one('.theme-number').text.strip().replace('共','').replace('集','')
        anime_link = 'https://ani.gamer.com.tw/' + anime_info['href']
        time.sleep(0.2)
        r1 = requests.get(anime_link, headers=headers)
        r1.encoding = 'utf8'
        detail_data = BeautifulSoup(r1.text, 'html.parser')
        acg_score = detail_data.select_one('.acg-score')
        star = acg_score.select_one('.score-overall-number').text.strip()
        rating_people = acg_score.select_one('.score-overall-people').text.strip().replace('人評價', '').replace(',', '')
        type_list = detail_data.select_one('.type-list')
        staff = []
        tags = []
        pre_data = []
        for p in type_list.find_all('p'):
            staff.append(p.text)
        for li in type_list.select('.tag'):
            tags.append(li.text)
        infos = [anime_name,show_view_number,anime_time,anime_episode,anime_link,star,rating_people,staff[1],staff[2],staff[3]]
        pre_data.append(infos)
        pre_data.append(tags)
        anime_data.append(pre_data)
        # print(f'動畫名:{anime_name}\n觀看數:{show_view_number}\n季度:{anime_time}\n集數:{anime_episode}\n動畫連結:{anime_link}\n{star}\n{rating_people}\n導演:{staff[1]}\n代理商:{staff[2]}\n製作廠商:{staff[3]}\n分類:{tags}\n')
    return anime_data

In [34]:
def create_table(conn) -> None:
    cursor = conn.cursor()
    cursor.execute(
        '''
    CREATE TABLE  IF NOT EXISTS 巴哈姆特動畫瘋(
	id SERIAL,
	動畫名 TEXT NOT NULL,
	觀看數 TEXT NOT NULL,
	季度 TEXT NOT NULL,
	集數 TEXT NOT NULL,
	動畫連結 TEXT NOT NULL,
	星級 TEXT,
	評分人數 TEXT,
	導演監督 TEXT NOT NULL,
	台灣代理 TEXT NOT NULL,
	製作廠商 TEXT NOT NULL,
	作品分類1 TEXT DEFAULT NULL,
	作品分類2 TEXT DEFAULT NULL,
	作品分類3 TEXT DEFAULT NULL,
	作品分類4 TEXT DEFAULT NULL,
	作品分類5 TEXT DEFAULT NULL,
	作品分類6 TEXT DEFAULT NULL,
	PRIMARY KEY(id),
	UNIQUE(動畫名)
    )
        '''
    )
    cursor.close()
    conn.commit()
    
def insert_data(conn, infos: list[str], tags: list[str]) -> None:
    # 避免作品標籤超過6個
    if len(tags) >= 7:
        tags = [tags[i] for i in range(6)]
    
    # column_names的必備元素
    column_names = [
        "動畫名", "觀看數", "季度", "集數", "動畫連結",
        "星級", "評分人數", "導演監督", "台灣代理", "製作廠商"
    ]
    
    # 依作品標籤數量增加column_names
    column_names += [f"作品分類{i + 1}" for i in range(len(tags))]

    # 基礎insert_sql
    insert_sql = f'''
        INSERT INTO 巴哈姆特動畫瘋
        ({','.join(column_names)})
        VALUES({','.join(['%s'] * len(column_names))})
        ON CONFLICT (動畫名) DO UPDATE SET 
    '''
    # 基礎insert_sql + 更新內容
    update_columns = [f"{column_names[i]}='{infos[i]}'" for i in range(1, 7)]
    on_conflict_sql = ', '.join(update_columns)
    insert_sql += on_conflict_sql

    cursor = conn.cursor()
    cursor.execute(insert_sql, infos + tags)
    cursor.close()
    conn.commit()

def fetch_data(sql:str) -> list[tuple]:
	conn = psycopg2.connect(database=pw.DATABASE,
                        user=pw.USER,
                        password=pw.PASSWORD,
                        host=pw.HOST,
                        port=pw.PORT)

	cursor = conn.cursor()
	cursor.execute(sql)
	rows = cursor.fetchall()
	conn.close()
	return rows

In [35]:
conn = psycopg2.connect(database=pw.DATABASE,
                        user=pw.USER,
                        password=pw.PASSWORD,
                        host=pw.HOST,
                        port=pw.PORT)
data = download_data()
create_table(conn)
for infos_tags in data:
    insert_data(conn, infos=infos_tags[0], tags=infos_tags[1])
conn.close()

請求成功：200


In [44]:
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'}
response = requests.get('https://ani.gamer.com.tw/animeList.php?', headers=headers)
response.encoding = 'utf8'
if response.status_code == 200:
    print(f'請求成功：{response.status_code}')
else:
    print(f'請求失敗：{response.status_code}')
index = BeautifulSoup(response.text, 'html.parser')

請求成功：200


In [62]:
last_page_number = index.select_one('.page_number > a:last-child').text
last_page_number = int(last_page_number)
for i in range(last_page_number):
    url = f'https://ani.gamer.com.tw/animeList.php?page={i+1}'
    print(url)

https://ani.gamer.com.tw/animeList.php?page=1
https://ani.gamer.com.tw/animeList.php?page=2
https://ani.gamer.com.tw/animeList.php?page=3
https://ani.gamer.com.tw/animeList.php?page=4
https://ani.gamer.com.tw/animeList.php?page=5
https://ani.gamer.com.tw/animeList.php?page=6
https://ani.gamer.com.tw/animeList.php?page=7
https://ani.gamer.com.tw/animeList.php?page=8
https://ani.gamer.com.tw/animeList.php?page=9
https://ani.gamer.com.tw/animeList.php?page=10
https://ani.gamer.com.tw/animeList.php?page=11
https://ani.gamer.com.tw/animeList.php?page=12
https://ani.gamer.com.tw/animeList.php?page=13
https://ani.gamer.com.tw/animeList.php?page=14
https://ani.gamer.com.tw/animeList.php?page=15
https://ani.gamer.com.tw/animeList.php?page=16
https://ani.gamer.com.tw/animeList.php?page=17
https://ani.gamer.com.tw/animeList.php?page=18
https://ani.gamer.com.tw/animeList.php?page=19
https://ani.gamer.com.tw/animeList.php?page=20
https://ani.gamer.com.tw/animeList.php?page=21
https://ani.gamer.com.

In [None]:
list = []

In [None]:
wan_list = []

In [None]:
for item in new_view_number_list:
    if '萬' in item:
        wan_list.append(item)

In [None]:
list.remove('統計中')

In [None]:
for i in range(len(list)):
    list[i] = int(list[i])

In [None]:
list

In [None]:
for i in range(len(wan_list)):
    n = wan_list[i].replace('萬', '')
    wan_list[i] = int(float(n) * 10000)

In [None]:
view_number_list_in_number = wan_list + list

In [None]:
view_number_list_in_number

In [None]:
wan_list + list

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Your data
x = view_number_list_in_number

# Set up the figure and axes
plt.figure(figsize=(10, 6))
ax = sns.histplot(x, bins=1000)  # Adjust bins according to your data
ax.set_xticks([j for j in range(1, 21)])

plt.title('Histogram with Custom Axis Ticks')

plt.show()

In [1]:
import requests
from bs4 import BeautifulSoup


headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36',
}
# 對"巴哈姆特動畫瘋"送出請求
r = requests.get('https://ani.gamer.com.tw/', headers=headers)
if r.status_code == 200:
    print(f'請求成功：{r.status_code}')

    # 藉由 BeautifulSoup 套件將網頁原始碼使用 `html.parser` 解析器來解析
    soup = BeautifulSoup(r.text, 'html.parser')
    # 取得各個動畫元素區塊
    newanime_item = soup.select_one('.timeline-ver > .newanime-block')
    anime_items = newanime_item.select('.newanime-date-area:not(.premium-block)')

    # 依序針對每個動畫區塊擷取資料
    for anime_item in anime_items:
        anime_name = anime_item.select_one('.anime-name > p').text.strip()
        print(anime_name)  # 動畫名稱
        anime_watch_number = anime_item.select_one('.anime-watch-number > p').text.strip()
        print(anime_watch_number)  # 觀看人數
        anime_episode = anime_item.select_one('.anime-episode').text.strip()
        print(anime_episode)  # 動畫集數
        anime_href = anime_item.select_one('a.anime-card-block').get('href')
        print('https://ani.gamer.com.tw/'+anime_href)  # 觀看連結

        # contents：將 tag 的子節點以列表的方式輸出
        anime_date = anime_item.select_one('.anime-date-info').contents[-1].string.strip()
        anime_time = anime_item.select_one('.anime-hours').text.strip()
        print(anime_date, anime_time)  # 日期與時間
        anime_img = anime_item.select_one('img.lazyload').get('src')
        print(anime_img)  # 動畫縮圖

        print('----------')
else:
    print(f'請求失敗：{r.status_code}')

請求成功：200
地下忍者
27.9萬
第9集
https://ani.gamer.com.tw/animeVideo.php?sn=36166
12/01 (五) 01:43
None
----------
神劍闖江湖 ―明治劍客浪漫譚―
100.6萬
第22集
https://ani.gamer.com.tw/animeVideo.php?sn=36165
12/01 (五) 01:25
None
----------
大小姐和看門犬
16.7萬
第10集
https://ani.gamer.com.tw/animeVideo.php?sn=36167
12/01 (五) 00:30
None
----------
咒術迴戰 第二季
736.1萬
第43集
https://ani.gamer.com.tw/animeVideo.php?sn=36168
12/01 (五) 00:00
None
----------
柚木家的四兄弟
15.4萬
第9集
https://ani.gamer.com.tw/animeVideo.php?sn=36098
12/01 (五) 00:00
None
----------
聖魔大戰 BIKKURI-MEN
2.3萬
第9集
https://ani.gamer.com.tw/animeVideo.php?sn=36169
11/30 (四) 23:30
None
----------
我的英雄學院 雄英 HEROES BATTLE
統計中
第1集
https://ani.gamer.com.tw/animeVideo.php?sn=36336
11/30 (四) 23:00
None
----------
Dr.STONE 新石紀 第三季
193.4萬
第19集
https://ani.gamer.com.tw/animeVideo.php?sn=36170
11/30 (四) 22:30
None
----------
魔法使的新娘 第二季
106萬
第21集
https://ani.gamer.com.tw/animeVideo.php?sn=36171
11/30 (四) 22:00
None
----------
想當冒險者前往都市的女兒成為 S 級
63.4萬
第10集
https://ani.gamer.com.t