In [29]:
import os
import shutil
import re
from pathlib import Path

pattern = r"说明书.*\.docx$"

def find_latest_files(src_folder, name_pattern):
    """
    Navigate through all subfolders, find files matching the name pattern, 
    and copy the latest modified version to the destination folder.
    
    :param src_folder: The root folder to search in.
    :param dest_folder: The folder to copy files to.
    :param name_pattern: The pattern to match file names.
    """
    file_map = {}
    not_valid_file_list = []
    regex = re.compile(name_pattern)

     # Walk through all subfolders and files
    file_count = 0  # Initialize the counter
    for root, _, files in os.walk(src_folder):
        for file in files:
            if regex.search(file):  # Match using regex
                full_path = os.path.join(root, file)
                try:
                    last_modified = os.path.getmtime(full_path)
                    file_count += 1  # Increment the counter
                    if file_count % 10 == 0:
                        print(f"file: {file}")
                except FileNotFoundError:
                    print(f"Warning: File not found: {full_path}")
                    not_valid_file_list.append(full_path)
                    continue  # Skip to the next file
                    
                
                if file not in file_map or last_modified > file_map[file][1]:
                    file_map[file] = (full_path, last_modified)

    # Return only the file paths
    return {file: data[0] for file, data in file_map.items()}, not_valid_file_list

def sanitize_filename(filename):
    invalid_chars = r'<>:"/\|?*'
    return re.sub(f"[{invalid_chars}]", "_", filename)

def copy_files_to_dest(file_map, dest_folder):
    """
    Copy files from their source paths to the destination folder.
    
    :param file_map: A dictionary with file names as keys and their source paths as values.
    :param dest_folder: The folder to copy files to.
    """
    for file, full_path in file_map.items():
        # Sanitize file name to avoid invalid characters
        sanitized_file = sanitize_filename(file)
        dest_path = os.path.join(dest_folder, sanitized_file)
        
        print(f"Source: {full_path}, Destination: {dest_path}")

        # Check if the source file exists
        if not os.path.exists(full_path):
            print(f"Warning: Source file does not exist: {full_path}")
            continue
        
        try:
            shutil.copy2(full_path, dest_path)
            print(f"Copied: {file} to {dest_folder}")
        except OSError as e:
            print(f"Error copying {file}: {e}") 

In [10]:
# Example usage
source_folder = r"C:\Users\feng.z\OneDrive - KASIKORNBANKGROUP\_Projects_"
destination_folder = r"D:\docs\project\requirement"
pattern = r"说明书.*\.docx$"

qualified_files, invalid_files = find_latest_files(source_folder, pattern)
#copy_files_to_dest(qualified_files, destination_folder)


file: 信保贷二期业务需求说明书 V1.0-20210818-OMT20210820_en.docx
file: CUP CAR LOAN 需求说明书 v1.0.4.docx
file: CUP CAR LOAN 需求说明书 v1.1.2.docx
file: CUP CAR LOAN 需求说明书 v1.1.3_Final.docx
file: FAF 需求说明书 v1.0.0.docx
file: 微信银行业务需求说明书 v1.2.docx
file: 微信银行业务需求说明书 v1.5.4.docx
file: 微信银行 IDAMS需求说明书.docx
file: 微信银行业务需求说明书 v1.5.0.docx
file: 材料2.1：微信银行业务需求说明书 v1.5.4.docx
file: OFFCN EDU LOAN Phase2 需求说明书 v1.0.4_20211220.docx
file: PAPH LOAN 需求说明书 v1.0.1.docx
file: PBOC人行征信衍生变量管理系统 需求说明书 v1.1.0.docx
file: CUP CAR LOAN 需求说明书 v1.1.3_0330.docx
file: CUP CAR LOAN 需求说明书 v1.0.4.docx
file: CUP CAR LOAN 需求说明书 v1.1.2.docx
file: 信保贷业务需求说明书 V1.12-20210319.docx
file: 信保贷二期业务需求说明书 V1.4-20210908(SIGNOFF).docx
file: CUP CAR LOAN 需求说明书 v1.1.0_0119.docx
file: 直连中企 需求说明书 v1.0.3.docx
file: Baode Car Leasing Loan 需求说明书 v1.3.1.docx
file: 宝德车租贷系统功能说明书V1.3.3.docx
file: 宝德车租贷系统功能说明书V1.3.3.docx
file: 开泰银行CNAPS项目需求分析说明书_ver1.2.docx
file: 开泰银行CNAPS项目需求分析说明书_ver2.12 Final.docx
file: 开泰银行CNAPS项目需求分析说明书_ver1.1.docx
file: 开泰银行CNAPS项目需求分析说明书_

In [20]:
next(iter(qualified_files))

'开泰银行(中国)_AML系统-睡眠户提醒需求规格说明书V.docx'

In [23]:
from itertools import islice
sample_files = dict(islice(qualified_files.items(), 2))

In [35]:
copy_files_to_dest(sample_files, destination_folder)

Source: C:\Users\feng.z\OneDrive - KASIKORNBANKGROUP\_Projects_\1Interface documents\09_AML\Project Requirement\开泰银行(中国)_AML系统-睡眠户提醒需求规格说明书V.docx, Destination: D:\docs\project\requirement\开泰银行(中国)_AML系统-睡眠户提醒需求规格说明书V.docx
Error copying 开泰银行(中国)_AML系统-睡眠户提醒需求规格说明书V.docx: [Errno 22] Invalid argument
Source: C:\Users\feng.z\OneDrive - KASIKORNBANKGROUP\_Projects_\XBD_YG\阳光\BRD\Release2\开泰银行(中国)_AML系统_PAOC需求说明书 V5.docx, Destination: D:\docs\project\requirement\开泰银行(中国)_AML系统_PAOC需求说明书 V5.docx
Error copying 开泰银行(中国)_AML系统_PAOC需求说明书 V5.docx: [Errno 22] Invalid argument


In [30]:
is_file_downloaded(r"C:\Users\feng.z\OneDrive - KASIKORNBANKGROUP\_Projects_\1Interface documents\09_AML\Project Requirement\开泰银行(中国)_AML系统-睡眠户提醒需求规格说明书V.docx")

True