You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
same code was working about a week back but now i get this error, running the code using modal labs remote gpus
code
importmodalstub=modal.Stub()
volume=modal.NetworkFileSystem.persisted("data")
MODEL_DIR="/data"@stub.function( cpu=2, memory=4276, gpu='A10G', timeout=1200, network_file_systems={MODEL_DIR: volume})defloadIndicTrans2(dataset_name):
importtimestart_time=time.time()
importosimportsubprocesscommands= [
"pip install -q bitsandbytes",
"apt update ",
"apt install -y git",
"git clone https://github.com/AI4Bharat/IndicTrans2"
]
forcommandincommands:
subprocess.run(command, shell=True)
os.chdir("IndicTrans2/huggingface_interface")
subprocess.run("bash install.sh", shell=True)
withopen('importIndic.py', 'w') asfile:
file.write(f'''try: import torch import os import pandas as pd import csv print(torch.cuda.get_device_name(0)) import sys from transformers import AutoModelForSeq2SeqLM, BitsAndBytesConfig print('from transformers imported') from IndicTransTokenizer import IndicProcessor, IndicTransTokenizer print('from indictranstokenizer imported') en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B" # ai4bharat/indictrans2-en-indic-dist-200M BATCH_SIZE = 4 DEVICE = "cuda" if torch.cuda.is_available() else "cpu" if len(sys.argv) > 1: quantization = sys.argv[1] else: quantization = "" def initialize_model_and_tokenizer(ckpt_dir, direction, quantization): if quantization == "4-bit": qconfig = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16, ) elif quantization == "8-bit": qconfig = BitsAndBytesConfig( load_in_8bit=True, bnb_8bit_use_double_quant=True, bnb_8bit_compute_dtype=torch.bfloat16, ) else: qconfig = None tokenizer = IndicTransTokenizer(direction=direction) model = AutoModelForSeq2SeqLM.from_pretrained( ckpt_dir, trust_remote_code=True, low_cpu_mem_usage=True, quantization_config=qconfig, ) if qconfig == None: model = model.to(DEVICE) model.half() model.eval() return tokenizer, model def batch_translate(input_sentences, src_lang, tgt_lang, model, tokenizer, ip): translations = [] for i in range(0, len(input_sentences), BATCH_SIZE): batch = input_sentences[i : i + BATCH_SIZE] batch = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang) inputs = tokenizer( batch, src=True, truncation=True, padding="longest", return_tensors="pt", return_attention_mask=True, ).to(DEVICE) with torch.no_grad(): generated_tokens = model.generate( **inputs, use_cache=True, min_length=0, max_length=256, num_beams=5, num_return_sequences=1, ) generated_tokens = tokenizer.batch_decode(generated_tokens.detach().cpu().tolist(), src=False) translations += ip.postprocess_batch(generated_tokens, lang=tgt_lang) del inputs torch.cuda.empty_cache() return translations ip = IndicProcessor(inference=True) en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, "en-indic", quantization) from datasets import load_dataset dataset_name = '{dataset_name}' if(dataset_name == "ai2_arc"): possible_configs = [ 'ARC-Challenge', 'ARC-Easy' ] # columns to translate columns = ['question','choices'] # columns not to translate, to keep in converted dataset as is. columns_asis = ['id','answerKey'] dataset = [] if(dataset_name == 'ai2_arc'): for config in possible_configs: base_url = 'https://huggingface.co/api/datasets/allenai/ai2_arc/parquet/{{config}}' data_files = {{'train': base_url + '/train/0.parquet','test':base_url + '/test/0.parquet', 'validation': base_url + '/validation/0.parquet'}} dataset_slice = load_dataset('parquet', data_files=data_files) dataset.append(dataset_slice)except Exception as e: # Handle the exception print('An error occurred:'+ str(e)) ''')
result=subprocess.run(['python', 'importIndic.py'], stdout=subprocess.PIPE)
@stub.local_entrypoint()defmain():
# provide dataset name among ai2_arc, gsm8k, lukaemon/mmludataset_name="ai2_arc"loadIndicTrans2.remote(dataset_name)
the error says An error occurred:[Errno 2] No such file or directory: '/usr/local/lib/python3.11/site-packages/RESOURCES/script/all_script_phonetic_data.csv'
The text was updated successfully, but these errors were encountered:
same code was working about a week back but now i get this error, running the code using modal labs remote gpus
code
the error says An error occurred:[Errno 2] No such file or directory: '/usr/local/lib/python3.11/site-packages/RESOURCES/script/all_script_phonetic_data.csv'
The text was updated successfully, but these errors were encountered: