In [6]:
import os
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
def download_images(query_i, query, num_images=10):
    # 设置Chrome选项
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # 无头模式
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")

    # 初始化WebDriver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

    # 创建存储图片的目录
    data_dir = "data"
    query_dir = os.path.join(data_dir, f"class{query_i}")
    if not os.path.exists(query_dir):
        os.makedirs(query_dir)

    # 搜索URL
    search_url = f"https://image.baidu.com/search/index?tn=baiduimage&word={query}"

    try:
        driver.get(search_url)
        images = set()

        while len(images) < num_images:
            # 获取所有图片元素
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            img_tags = soup.find_all('img', class_='main_img')

            for img in img_tags:
                src = img.get('src')
                if src and src.startswith('http'):
                    images.add(src)

            # 向下滚动页面以加载更多图片
            driver.execute_script("window.scrollBy(0, document.body.scrollHeight);")
            time.sleep(2)  # 等待页面加载更多图片

        print(f"Found {len(images)} images.")

        # 将集合转换为列表
        images_list = list(images)[:num_images]

        # 下载图片
        for i, img_url in enumerate(images_list):
            try:
                response = requests.get(img_url, timeout=5)
                with open(os.path.join(query_dir, f"{i}.jpg"), "wb") as file:
                    file.write(response.content)
                print(f"Downloaded image {i+1}/{num_images} for {query}")
            except Exception as e:
                print(f"Failed to download image {i+1} for {query}: {e}")

    finally:
        driver.quit()

In [None]:
keywords = [
    "羊肚菌", "牛肝菌", "鸡油菌", "鸡枞菌", "青头菌", "奶浆菌", "干巴菌", "虎掌菌",
    "白葱牛肝菌", "老人头菌", "猪肚菌", "谷熟菌", "白参菌", "黑木耳", "银耳", "金耳",
    "猴头菇", "香菇", "平菇", "金针菇", "口蘑", "鹿茸菇", "榆黄蘑", "榛蘑", "草菇",
    "鸡腿菇", "茶树菇", "蟹味菇", "白玉菇", "红菇", "杏鲍菇", "松茸", "姬松茸", "松露",
    "竹荪", "虫草花"
]
number_of_images = 200

data_dir = "data"

with open('label.txt', 'w') as f:
    for i, keyword in enumerate(keywords):
        download_images(i, keyword, number_of_images)
        f.write(keyword + " " + f"class{i}")

In [None]:
import os

def rename_images_in_folders(data_dir):
    # 遍历data目录下的所有子文件夹
    for folder_name in os.listdir(data_dir):
        folder_path = os.path.join(data_dir, folder_name)
        
        if os.path.isdir(folder_path):
            # 获取文件夹中所有的图片文件
            image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]

            # 重命名图片文件
            for i, old_filename in enumerate(image_files):
                old_file_path = os.path.join(folder_path, old_filename)
                new_filename = f"{i}.jpg"
                new_file_path = os.path.join(folder_path, new_filename)
                
                try:
                    os.rename(old_file_path, new_file_path)
                    print(f"Renamed {old_filename} to {new_filename} in {folder_name}")
                except Exception as e:
                    print(f"Failed to rename {old_filename} in {folder_name}: {e}")

rename_images_in_folders(data_dir)





In [29]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import os
from PIL import Image
from io import BytesIO

# 列表中的食用菌名称
keywords = [
    "羊肚菌", "牛肝菌", "鸡油菌", "鸡枞菌", "青头菌", "奶浆菌", "干巴菌", "虎掌菌",
    "白葱牛肝菌", "老人头菌", "猪肚菌", "谷熟菌", "白参菌", "黑木耳", "银耳", "金耳",
    "猴头菇", "香菇", "平菇", "金针菇", "口蘑", "鹿茸菇", "榆黄蘑", "榛蘑", "草菇",
    "鸡腿菇", "茶树菇", "蟹味菇", "白玉菇", "红菇", "杏鲍菇", "松茸", "姬松茸", "松露",
    "竹荪", "虫草花"
]

def fetch_mushroom_info(keyword):
    url = f"https://baike.baidu.com/item/{keyword}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0'
    }
    response = requests.get(url, headers=headers, allow_redirects=True)
    
    if response.status_code != 200:
        return None, None
    
    soup = BeautifulSoup(response.content, 'html.parser')

    # 获取<meta name="description">标签的内容
    meta_tag = soup.find('meta', attrs={'name': 'description'})
    description = meta_tag['content'].strip() if meta_tag and 'content' in meta_tag.attrs else None

    # 获取第一张图片的URL
    img_tag = soup.find('meta', attrs={'name': 'image'})
    img_url = img_tag['content'].strip() if meta_tag and 'content' in meta_tag.attrs else None

    return description, img_url

def create_database():
    conn = sqlite3.connect('mushrooms2.db')
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS mushrooms
                 (name TEXT, description TEXT, image_path TEXT)''')
    conn.commit()
    conn.close()

def save_to_database(name, description, image_path):
    conn = sqlite3.connect('mushrooms2.db')
    c = conn.cursor()
    c.execute("INSERT INTO mushrooms (name, description, image_path) VALUES (?, ?, ?)", (name, description, image_path))
    conn.commit()
    conn.close()

def download_image(name, img_url, folder='images'):
    if not os.path.exists(folder):
        os.makedirs(folder)

    img_data = requests.get(img_url).content
    img_name = os.path.join(folder, f"{name}.jpg")
    with open(img_name, 'wb') as handler:
        handler.write(img_data)
    return img_name

def display_image(image_path):
    if image_path and os.path.exists(image_path):
        img = Image.open(image_path)
        img.show()
    else:
        print(f"Image not found: {image_path}")

if __name__ == "__main__":
    create_database()
    for keyword in keywords:
        print(f"Fetching information for {keyword}...")
        description, img_url = fetch_mushroom_info(keyword)
        image_path = None
        if img_url:
            try:
                image_path = download_image(keyword, img_url)
                print(f"Downloaded image for {keyword}: {image_path}")
            except Exception as e:
                print(f"Failed to download image for {keyword}: {e}")

        if description:
            save_to_database(keyword, description, image_path)
            print(f"Saved information for {keyword}.")
            print(description)
            display_image(image_path)
        else:
            print(f"No information found for {keyword}.")


Fetching information for 羊肚菌...
Downloaded image for 羊肚菌: images\羊肚菌.jpg
Saved information for 羊肚菌.
羊肚菌（学名：Morchella esculenta (L.) Pers. ）是羊肚菌科、羊肚菌属真菌，菌盖近球形、卵形至椭圆形，高可达10厘米，顶端钝圆，表面有似羊肚状的凹坑。凹坑不定蛋壳色至淡黄褐色，棱纹色较浅，柄近圆柱形，近白色，中空，圆筒形，孢子长椭圆形，无色，侧丝顶端膨大，体轻，质酥脆。羊肚菌在全世界都有分布，其中在法国、德国、美国、印度、中国分布较广，其次在俄罗斯、瑞典、墨西哥、西班牙、捷克斯洛伐克和巴基斯坦局部地区等均有零星分布。羊肚菌在中国的分布极为广泛，北至东北三省，南至广东、福建、台湾，东至山东，西至新疆、西藏、宁夏、贵州共28个省、市、自治区。羊肚菌多生长在阔叶林或针阔混交林的腐殖质层上。主要生长于富含腐殖质的沙壤土中或褐土、棕壤等。羊肚菌在火烧后的林地上比较容易大发生。羊肚菌是食药兼用菌，其香味独特，营养丰富，富含多种人体需要的氨基酸和有机锗，一直被欧美等国家作为人
Fetching information for 牛肝菌...
Downloaded image for 牛肝菌: images\牛肝菌.jpg
Saved information for 牛肝菌.
牛肝菌是牛肝菌科和松塔牛肝菌科等真菌的统称，是野生而可以食用的菇菌类，其中除少数品种有毒或味苦而不能食用外，大部分品种均可食用。主要有白、黄、黑牛肝菌。白牛肝菌味道鲜美，营养丰富。该菌菌体较大，肉肥厚，柄粗壮，食味香甜可口，营养丰富，是一种世界性著名食用菌。西欧各国也有广泛食用白牛肝菌的习惯，除新鲜的作菜外，大部分切片干燥，加工成各种小包装，用来配制汤料或做成酱油浸膏，也有制成盐腌品食用。
Fetching information for 鸡油菌...
Downloaded image for 鸡油菌: images\鸡油菌.jpg
Saved information for 鸡油菌.
鸡油菌（学名：Cantharellus cibarius Fr.）为鸡油菌科鸡油菌属真菌，又名鸡蛋黄菌、黄菌、杏菌等。鸡油菌子实体肉质，喇叭形，杏黄色至蛋黄色。菌盖宽3~10厘米，高7~12厘米，初扁平，

In [None]:
import os

keywords = [
    "羊肚菌", "牛肝菌", "鸡油菌", "鸡枞菌", "青头菌", "奶浆菌", "干巴菌", "虎掌菌",
    "白葱牛肝菌", "老人头菌", "猪肚菌", "谷熟菌", "白参菌", "黑木耳", "银耳", "金耳",
    "猴头菇", "香菇", "平菇", "金针菇", "口蘑", "鹿茸菇", "榆黄蘑", "榛蘑", "草菇",
    "鸡腿菇", "茶树菇", "蟹味菇", "白玉菇", "红菇", "杏鲍菇", "松茸", "姬松茸", "松露",
    "竹荪", "虫草花"
]

