# 使用GalTransl进行翻译

- **项目地址**: [https://github.com/xd2333/GalTransl](https://github.com/xd2333/GalTransl)

## 使用方法

* 导入 `fatecyx/galtransl-7b` 模型。
  * 使用其他模型注意替换MODEL_PATH = "/kaggle/input/galtransl-7b/gguf/galtransl-7b-v2-q6_k/1/GalTransl-7B-v2-Q6_K.gguf"

* 导入 `fatecyx/LLMServer` 和 `fatecyx/GalTransl` 数据集。

* 通过datasets上传galtransl工程目录，替换以下CP命令中的 `/kaggle/input/dec-i18n-project2/DEC_i18n_project`

In [None]:
!cp -r /kaggle/input/dec-i18n-project2/DEC_i18n_project /kaggle/working/galtransl_project

!cp -r /kaggle/input/llmserver/LLMServer /kaggle/working/LLMServer
!cp -r /kaggle/input/galtransl/GalTransl /kaggle/working/GalTransl

# 修改配置文件打印日志
!sed -i 's/\(^\s*\)saveLog:.*/\1saveLog: true/' /kaggle/working/galtransl_project/config.yaml

# 补上 python 软链接
%cd /kaggle/working/GalTransl/env/bin
!ln -s /opt/conda/bin/python3 python
!ln -s python python3
!ln -s python python3.10

%cd /kaggle/working/LLMServer
!pip install "diskcache>=5.6.1"
!pip install llama-cpp-python -i https://sakurallm.github.io/llama-cpp-python/whl/cu121
!pip install -q -r requirements.llamacpp.txt


In [None]:
# ngrok：本地HTTP服务用于中途下载（一般用不上）
# !pip install -q pyngrok
ngrokToken = ""

PORT = 8000
DIRECTORY = "/kaggle/working"
def local_http():
    import http.server
    import socketserver

    Handler = http.server.SimpleHTTPRequestHandler
    Handler.directory = DIRECTORY

    with socketserver.TCPServer(("", PORT), Handler) as httpd:
        print("HTTP server is running at port", PORT)
        print("Server directory is", DIRECTORY)
        httpd.serve_forever()

if ngrokToken:
    import threading
    threading.Thread(target=local_http, daemon=True, args=()).start()
        
    from pyngrok import conf, ngrok
    conf.get_default().auth_token = ngrokToken
    conf.get_default().monitor_thread = False
    ssh_tunnels = ngrok.get_tunnels(conf.get_default())
    if len(ssh_tunnels) == 0:
        ssh_tunnel = ngrok.connect(PORT)
        print('address：'+ssh_tunnel.public_url)
    else:
        print('address：'+ssh_tunnels[0].public_url)


In [None]:
# 启动sakura模型
%cd /kaggle/working/LLMServer
from pathlib import Path
MODEL_PATH = "/kaggle/input/qwen2.5-14b/gguf/qwen2.5-14b-iq4xs/1/qwen2.5-14b-iq4xs.gguf"

import subprocess
import threading

def local_model(dic_status):
    cmd = f"python server.py --model_name_or_path {MODEL_PATH} --llama_cpp --use_gpu --model_version 0.9 --trust_remote_code --no-auth"
    cmds = ["python",
           "server.py",
           "--model_name_or_path",
           MODEL_PATH,
           "--llama_cpp",
           "--use_gpu",
           ]
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
    for line in p.stderr:
        if not dic_status['status']:
            s2 = line.decode()
            dic_status['msg'] = s2
            if "INFO Running on http://127.0.0.1:5000" in s2:
                dic_status['status'] = True
        #print(line.decode(), end='')
dic_status = {'status': False, 'msg': ''}

threading.Thread(target=local_model, daemon=True, args=(dic_status,)).start()

s = ""
while not dic_status['status']:
    if s != dic_status['msg']:
        s = dic_status['msg']
        print(s)
print("模型启动成功")


In [None]:
import subprocess
import time
import threading

# 定义缓冲区和时间记录
output_buffer = []
error_buffer = []
last_output_time = time.time()

# 设置超时时间
timeout = int(11 * 3600)
output_internal = 1800

def read_output(pipe, buffer):
    for line in iter(pipe.readline, b''):
        try:
            line = line.decode('utf-8').strip()
        except Exception as e:
            continue
        if line:
            buffer.append((time.time(), line))
            if len(buffer) > 2:
                buffer.pop(0)

# 启动子进程
process = subprocess.Popen(
    ["/kaggle/working/GalTransl/env/bin/python", 
     "-m", "GalTransl",
     "-p", "/kaggle/working/galtransl_project",
     "-t", "sakura-v1.0"],
    cwd="/kaggle/working/GalTransl",
    stdout=subprocess.PIPE,  # 捕获标准输出
    stderr=subprocess.PIPE,  # 捕获错误输出
)

# 创建线程读取标准输出和标准错误输出
output_thread = threading.Thread(target=read_output, args=(process.stdout, output_buffer))
error_thread = threading.Thread(target=read_output, args=(process.stderr, error_buffer))

output_thread.start()
error_thread.start()

start_time = time.time()

try:
    while process.poll() is None:
        # 每隔1小时输出最后2行和记录的时间
        current_time = time.time()
        if current_time - last_output_time >= 3600:
            last_output_time = current_time
            
            # 获取最新的输出和错误
            combined_output = output_buffer + error_buffer
            # 过滤掉小于 last_output_time 的行
            combined_output = [item for item in combined_output if item[0] >= last_output_time]
            # 按时间排序
            combined_output.sort(key=lambda x: x[0])
            
            # 输出日志
            print(f"Output at {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}:")
            for timestamp, line in combined_output:
                print(line)
            
            # 输出第一条和最后一条的时间
            if combined_output:
                first_timestamp = combined_output[0][0]
                last_timestamp = combined_output[-1][0]
                print(f"First log time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(first_timestamp))}")
                print(f"Last log time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_timestamp))}")
        
        
        # 检查是否超时
        if current_time - start_time >= timeout:
            raise subprocess.TimeoutExpired(process.args, timeout)
            
        time.sleep(1)
    
    # 等待线程完成
    output_thread.join()
    error_thread.join()
    
    print("Process completed successfully.")

except subprocess.TimeoutExpired:
    process.terminate()
    print("Process timed out and was terminated.")


## 结果

- 结束时会生成 `/kaggle/working/result.zip` 文件。
- 成功结束时，日志会打印 `Process completed successfully.`。
- 如果未完成，则会显示 `Process timed out and was terminated.`。此时可以将 `result.zip` 重新作为输入文件再次运行。

In [None]:
import shutil
shutil.make_archive('/kaggle/working/result', 'zip', '/kaggle/working/galtransl_project')