# 环境配置与依赖安装

本notebook用于配置多模态RAG项目的Python环境并安装所需依赖库。

## 1. 检查Python环境

In [None]:
import sys
import os

print(f"Python版本: {sys.version}")
print(f"当前工作目录: {os.getcwd()}")
print(f"Python可执行文件路径: {sys.executable}")

## 2. 安装核心依赖库

安装项目所需的核心依赖库，包括mineru、transformers、faiss-gpu等。

In [None]:
# 安装核心依赖
!pip install --upgrade pip
!pip install mineru
!pip install transformers
!pip install faiss-gpu
!pip install tqdm
!pip install numpy
!pip install pandas
!pip install PyPDF2
!pip install jupyter

## 3. 验证依赖安装

In [None]:
# 验证关键库是否正确安装
try:
    import mineru
    print("✓ mineru 安装成功")
except ImportError as e:
    print(f"✗ mineru 安装失败: {e}")

try:
    import transformers
    print(f"✓ transformers 安装成功，版本: {transformers.__version__}")
except ImportError as e:
    print(f"✗ transformers 安装失败: {e}")

try:
    import faiss
    print("✓ faiss-gpu 安装成功")
except ImportError as e:
    print(f"✗ faiss-gpu 安装失败: {e}")

try:
    import tqdm
    print(f"✓ tqdm 安装成功，版本: {tqdm.__version__}")
except ImportError as e:
    print(f"✗ tqdm 安装失败: {e}")

try:
    import numpy as np
    print(f"✓ numpy 安装成功，版本: {np.__version__}")
except ImportError as e:
    print(f"✗ numpy 安装失败: {e}")

try:
    import PyPDF2
    print(f"✓ PyPDF2 安装成功，版本: {PyPDF2.__version__}")
except ImportError as e:
    print(f"✗ PyPDF2 安装失败: {e}")

## 4. 生成requirements.txt文件

In [None]:
# 生成requirements.txt文件
!pip freeze > requirements.txt

# 显示生成的requirements.txt内容
with open('requirements.txt', 'r') as f:
    content = f.read()
    print("requirements.txt 文件内容:")
    print("=" * 50)
    print(content[:1000])  # 显示前1000个字符
    if len(content) > 1000:
        print("...\n(文件内容较长，已截断显示)")

## 5. 检查项目目录结构

In [None]:
import os

def show_directory_structure(path, prefix="", max_depth=3, current_depth=0):
    if current_depth >= max_depth:
        return
    
    items = sorted(os.listdir(path))
    for i, item in enumerate(items):
        if item.startswith('.') and item not in ['.gitignore']:
            continue
        
        item_path = os.path.join(path, item)
        is_last = i == len(items) - 1
        
        current_prefix = "└── " if is_last else "├── "
        print(f"{prefix}{current_prefix}{item}")
        
        if os.path.isdir(item_path):
            extension = "    " if is_last else "│   "
            show_directory_structure(item_path, prefix + extension, max_depth, current_depth + 1)

print("项目目录结构:")
print("RAG-01/")
show_directory_structure('.', max_depth=2)