In [2]:
import sys
import os

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
if project_root not in sys.path:
    sys.path.insert(0, project_root)



In [3]:
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# Download from the ðŸ¤— Hub
model = SentenceTransformer("all-MiniLM-L6-v2")
# Run inference
sentences = [
    'Volksvertreter',
    'Parlamentarier',
    'OberbÃ¼rgermeister',
]
embeddings = model.encode(sentences)
print(embeddings.shape)

(3, 384)


In [3]:

# Download from the ðŸ¤— Hub
model = SentenceTransformer("all-MiniLM-L6-v2")
# Run inference
sentences = [
    'Volksvertreter',
    'Parlamentarier',
    'OberbÃ¼rgermeister',
]
embeddings = model.encode(sentences, normalize_embeddings=True)
print(embeddings.shape)

(3, 384)


In [4]:
from model import BiEncoder 
import numpy as np

def test_normalization_flag(base_config):
    """Test that the normalize_output flag works correctly."""
    # Test with normalization ON
    base_config.model.proj_dim = None # No projection for simplicity
    model_norm = BiEncoder(base_config.model, base_config.device)
    
    job_titles = ["Data Scientist"]
    job_embs_norm = model_norm.encode_job(job_titles, normalize=True)
    
    # Check that the L2 norm is close to 1
    norm = np.linalg.norm(job_embs_norm, axis=1)
    assert np.allclose(norm, 1.0), "Embeddings should be normalized"

    # Test with normalization OFF
    model_no_norm = BiEncoder(base_config.model, base_config.device)

    job_embs_no_norm = model_no_norm.encode_job(job_titles, normalize=False)

    # Check that the L2 norm is NOT 1
    norm_unnormalized = np.linalg.norm(job_embs_no_norm, axis=1)
    assert not np.allclose(norm_unnormalized, 1.0), "Embeddings should not be normalized"

In [5]:
def base_config(dummy_data_files):
    """A base config that can be modified by other fixtures."""
    pairs_path, esco_path = dummy_data_files
    return Config(
        seed=42,
        device="cpu",
        model=ModelConfig(
            hf_id="sentence-transformers/all-MiniLM-L6-v2",
            proj_dim=None,
            asymmetric=False,
            normalize_output=True,
        ),
        data=DataConfig(pairs_path=pairs_path, esco_titles_path=esco_path),
        infer=InferConfig(batch_size=32, topk=5),
        artifacts=ArtifactsConfig(run_dir="runs/test"),
    )

In [17]:
from src.config import Config, ModelConfig, DataConfig, InferConfig, ArtifactsConfig
import os
import tempfile

# Create dummy files for the config
temp_dir = tempfile.gettempdir()
pairs_path = os.path.join(temp_dir, "dummy_pairs.jsonl")
esco_path = os.path.join(temp_dir, "dummy_esco.jsonl")

with open(pairs_path, "w") as f:
    f.write('{"skill": "python", "job": "Data Scientist"}\n')

with open(esco_path, "w") as f:
    f.write('{"title": "Data Scientist"}\n')

dummy_data_files = (pairs_path, esco_path)

def create_base_config(dummy_data_files):
    """A base config that can be modified by other fixtures."""
    pairs_path, esco_path = dummy_data_files
    return Config(
        seed=42,
        device="cpu",
        model=ModelConfig(
            hf_id="sentence-transformers/all-MiniLM-L6-v2",
            proj_dim=None,
            asymmetric=False,
            normalize_output=False,
        ),
        data=DataConfig(pairs_path=pairs_path, esco_titles_path=esco_path),
        infer=InferConfig(batch_size=32, topk=5),
        artifacts=ArtifactsConfig(run_dir="runs/test"),
    )

base_conf = create_base_config(dummy_data_files)



In [8]:
test_normalization_flag(base_conf)

AssertionError: Embeddings should not be normalized

In [None]:
from model import BiEncoder 
import numpy as np

def test_normalization_flag(base_config):
    """Test that the normalize_output flag works correctly."""
    # Test with normalization ON
    base_config.model.proj_dim = None # No projection for simplicity
    model_norm = BiEncoder(base_config.model, base_config.device)
    
    job_titles = ["Data Scientist"]
    job_embs_norm = model_norm.encode_job(job_titles, normalize=True)
    
    # Check that the L2 norm is close to 1
    norm = np.linalg.norm(job_embs_norm, axis=1)
    assert np.allclose(norm, 1.0), "Embeddings should be normalized"

    # Test with normalization OFF
    model_no_norm = BiEncoder(base_config.model, base_config.device)

    job_embs_no_norm = model_no_norm.encode_job(job_titles, normalize=False)

    # Check that the L2 norm is NOT 1
    norm_unnormalized = np.linalg.norm(job_embs_no_norm, axis=1)
    assert not np.allclose(norm_unnormalized, 1.0), "Embeddings should not be normalized"

In [18]:

base_conf.model.proj_dim = None # No projection for simplicity
model_norm = BiEncoder(base_conf.model, base_conf.device)

In [19]:
job_titles = ["Data Scientist"]
job_embs_norm = model_norm.encode_job(job_titles, normalize=True)

In [20]:
norm = np.linalg.norm(job_embs_norm, axis=1)
assert np.allclose(norm, 1.0), "Embeddings should be normalized"

In [21]:
model_no_norm = BiEncoder(base_conf.model, base_conf.device)

job_embs_no_norm = model_no_norm.encode_job(job_titles, normalize=False)

In [22]:
model_no_norm

BiEncoder(
  (st_model): SentenceTransformer(
    (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
    (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
    (2): Normalize()
  )
)

In [25]:
# Check that the L2 norm is NOT 1
norm_unnormalized = np.linalg.norm(job_embs_no_norm, axis=1)
assert not np.allclose(norm_unnormalized, 1.0), "Embeddings should not be normalized"

AssertionError: Embeddings should not be normalized

In [28]:
print(SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2"))

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)


In [29]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Drop the last module (Normalize)
model._modules.pop(str(len(model._modules)-1))

print(model)

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)


In [31]:
from sentence_transformers import SentenceTransformer
import numpy as np

# Load a pretrained model (with Normalize layer in its architecture)
model =  SentenceTransformer("all-mpnet-base-v2")

texts = ["This is a test sentence.", "Another sentence to encode."]

# Encode with normalization
emb_norm = model.encode(texts, normalize_embeddings=True)
# Encode without normalization
emb_raw = model.encode(texts, normalize_embeddings=False)

# Check L2 norms
print("With normalization:")
for i, e in enumerate(emb_norm):
    print(f"  Text {i}: norm = {np.linalg.norm(e):.4f}")

print("\nWithout normalization:")
for i, e in enumerate(emb_raw):
    print(f"  Text {i}: norm = {np.linalg.norm(e):.4f}")


With normalization:
  Text 0: norm = 1.0000
  Text 1: norm = 1.0000

Without normalization:
  Text 0: norm = 1.0000
  Text 1: norm = 1.0000


In [32]:
print(model)

SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False, 'architecture': 'MPNetModel'})
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)


In [10]:
from sentence_transformers import SentenceTransformer

# Load the JobGTE model from HuggingFace
jobgte_model = SentenceTransformer("pj-mathematician/JobGTE-multilingual-base-v2", trust_remote_code=True)

# Print the model structure
print(jobgte_model)


No sentence-transformers model found with name pj-mathematician/JobGTE-multilingual-base-v2. Creating a new one with mean pooling.


ValueError: Unrecognized model in pj-mathematician/JobGTE-multilingual-base-v2. Should have a `model_type` key in its config.json, or contain one of the following strings in its name: aimv2, aimv2_vision_model, albert, align, altclip, apertus, arcee, aria, aria_text, audio-spectrogram-transformer, autoformer, aya_vision, bamba, bark, bart, beit, bert, bert-generation, big_bird, bigbird_pegasus, biogpt, bit, bitnet, blenderbot, blenderbot-small, blip, blip-2, blip_2_qformer, bloom, blt, bridgetower, bros, camembert, canine, chameleon, chinese_clip, chinese_clip_vision_model, clap, clip, clip_text_model, clip_vision_model, clipseg, clvp, code_llama, codegen, cohere, cohere2, cohere2_vision, colpali, colqwen2, conditional_detr, convbert, convnext, convnextv2, cpmant, csm, ctrl, cvt, d_fine, dab-detr, dac, data2vec-audio, data2vec-text, data2vec-vision, dbrx, deberta, deberta-v2, decision_transformer, deepseek_v2, deepseek_v3, deepseek_vl, deepseek_vl_hybrid, deformable_detr, deit, depth_anything, depth_pro, deta, detr, dia, diffllama, dinat, dinov2, dinov2_with_registers, dinov3_convnext, dinov3_vit, distilbert, doge, donut-swin, dots1, dpr, dpt, edgetam, edgetam_video, edgetam_vision_model, efficientformer, efficientloftr, efficientnet, electra, emu3, encodec, encoder-decoder, eomt, ernie, ernie4_5, ernie4_5_moe, ernie_m, esm, evolla, exaone4, falcon, falcon_h1, falcon_mamba, fastspeech2_conformer, fastspeech2_conformer_with_hifigan, flaubert, flava, flex_olmo, florence2, fnet, focalnet, fsmt, funnel, fuyu, gemma, gemma2, gemma3, gemma3_text, gemma3n, gemma3n_audio, gemma3n_text, gemma3n_vision, git, glm, glm4, glm4_moe, glm4v, glm4v_moe, glm4v_moe_text, glm4v_text, glpn, got_ocr2, gpt-sw3, gpt2, gpt_bigcode, gpt_neo, gpt_neox, gpt_neox_japanese, gpt_oss, gptj, gptsan-japanese, granite, granite_speech, granitemoe, granitemoehybrid, granitemoeshared, granitevision, graphormer, grounding-dino, groupvit, helium, hgnet_v2, hiera, hubert, hunyuan_v1_dense, hunyuan_v1_moe, ibert, idefics, idefics2, idefics3, idefics3_vision, ijepa, imagegpt, informer, instructblip, instructblipvideo, internvl, internvl_vision, jamba, janus, jetmoe, jukebox, kosmos-2, kosmos-2.5, kyutai_speech_to_text, layoutlm, layoutlmv2, layoutlmv3, led, levit, lfm2, lfm2_vl, lightglue, lilt, llama, llama4, llama4_text, llava, llava_next, llava_next_video, llava_onevision, longcat_flash, longformer, longt5, luke, lxmert, m2m_100, mamba, mamba2, marian, markuplm, mask2former, maskformer, maskformer-swin, mbart, mctct, mega, megatron-bert, metaclip_2, mgp-str, mimi, minimax, ministral, mistral, mistral3, mixtral, mlcd, mllama, mm-grounding-dino, mobilebert, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, modernbert, modernbert-decoder, moonshine, moshi, mpnet, mpt, mra, mt5, musicgen, musicgen_melody, mvp, nat, nemotron, nezha, nllb-moe, nougat, nystromformer, olmo, olmo2, olmo3, olmoe, omdet-turbo, oneformer, open-llama, openai-gpt, opt, ovis2, owlv2, owlvit, paligemma, parakeet, parakeet_ctc, parakeet_encoder, patchtsmixer, patchtst, pegasus, pegasus_x, perceiver, perception_encoder, perception_lm, persimmon, phi, phi3, phi4_multimodal, phimoe, pix2struct, pixtral, plbart, poolformer, pop2piano, prompt_depth_anything, prophetnet, pvt, pvt_v2, qdqbert, qwen2, qwen2_5_omni, qwen2_5_vl, qwen2_5_vl_text, qwen2_audio, qwen2_audio_encoder, qwen2_moe, qwen2_vl, qwen2_vl_text, qwen3, qwen3_moe, qwen3_next, qwen3_omni_moe, qwen3_vl, qwen3_vl_moe, qwen3_vl_moe_text, qwen3_vl_text, rag, realm, recurrent_gemma, reformer, regnet, rembert, resnet, retribert, roberta, roberta-prelayernorm, roc_bert, roformer, rt_detr, rt_detr_resnet, rt_detr_v2, rwkv, sam, sam2, sam2_hiera_det_model, sam2_video, sam2_vision_model, sam_hq, sam_hq_vision_model, sam_vision_model, seamless_m4t, seamless_m4t_v2, seed_oss, segformer, seggpt, sew, sew-d, shieldgemma2, siglip, siglip2, siglip2_vision_model, siglip_vision_model, smollm3, smolvlm, smolvlm_vision, speech-encoder-decoder, speech_to_text, speech_to_text_2, speecht5, splinter, squeezebert, stablelm, starcoder2, superglue, superpoint, swiftformer, swin, swin2sr, swinv2, switch_transformers, t5, t5gemma, table-transformer, tapas, textnet, time_series_transformer, timesfm, timesformer, timm_backbone, timm_wrapper, trajectory_transformer, transfo-xl, trocr, tvlt, tvp, udop, umt5, unispeech, unispeech-sat, univnet, upernet, van, vaultgemma, video_llava, videomae, vilt, vipllava, vision-encoder-decoder, vision-text-dual-encoder, visual_bert, vit, vit_hybrid, vit_mae, vit_msn, vitdet, vitmatte, vitpose, vitpose_backbone, vits, vivit, vjepa2, voxtral, voxtral_encoder, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, xcodec, xglm, xlm, xlm-prophetnet, xlm-roberta, xlm-roberta-xl, xlnet, xlstm, xmod, yolos, yoso, zamba, zamba2, zoedepth

In [9]:
SentenceTransformer()

SentenceTransformer(
  (0): None
)

In [11]:
from sentence_transformers import SentenceTransformer

# Download from the ðŸ¤— Hub
model = SentenceTransformer("pj-mathematician/JobGTE-multilingual-base-v2")
# Run inference
sentences = [
    'Volksvertreter',
    'Parlamentarier',
    'OberbÃ¼rgermeister',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]


No sentence-transformers model found with name pj-mathematician/JobGTE-multilingual-base-v2. Creating a new one with mean pooling.


ValueError: Unrecognized model in pj-mathematician/JobGTE-multilingual-base-v2. Should have a `model_type` key in its config.json, or contain one of the following strings in its name: aimv2, aimv2_vision_model, albert, align, altclip, apertus, arcee, aria, aria_text, audio-spectrogram-transformer, autoformer, aya_vision, bamba, bark, bart, beit, bert, bert-generation, big_bird, bigbird_pegasus, biogpt, bit, bitnet, blenderbot, blenderbot-small, blip, blip-2, blip_2_qformer, bloom, blt, bridgetower, bros, camembert, canine, chameleon, chinese_clip, chinese_clip_vision_model, clap, clip, clip_text_model, clip_vision_model, clipseg, clvp, code_llama, codegen, cohere, cohere2, cohere2_vision, colpali, colqwen2, conditional_detr, convbert, convnext, convnextv2, cpmant, csm, ctrl, cvt, d_fine, dab-detr, dac, data2vec-audio, data2vec-text, data2vec-vision, dbrx, deberta, deberta-v2, decision_transformer, deepseek_v2, deepseek_v3, deepseek_vl, deepseek_vl_hybrid, deformable_detr, deit, depth_anything, depth_pro, deta, detr, dia, diffllama, dinat, dinov2, dinov2_with_registers, dinov3_convnext, dinov3_vit, distilbert, doge, donut-swin, dots1, dpr, dpt, edgetam, edgetam_video, edgetam_vision_model, efficientformer, efficientloftr, efficientnet, electra, emu3, encodec, encoder-decoder, eomt, ernie, ernie4_5, ernie4_5_moe, ernie_m, esm, evolla, exaone4, falcon, falcon_h1, falcon_mamba, fastspeech2_conformer, fastspeech2_conformer_with_hifigan, flaubert, flava, flex_olmo, florence2, fnet, focalnet, fsmt, funnel, fuyu, gemma, gemma2, gemma3, gemma3_text, gemma3n, gemma3n_audio, gemma3n_text, gemma3n_vision, git, glm, glm4, glm4_moe, glm4v, glm4v_moe, glm4v_moe_text, glm4v_text, glpn, got_ocr2, gpt-sw3, gpt2, gpt_bigcode, gpt_neo, gpt_neox, gpt_neox_japanese, gpt_oss, gptj, gptsan-japanese, granite, granite_speech, granitemoe, granitemoehybrid, granitemoeshared, granitevision, graphormer, grounding-dino, groupvit, helium, hgnet_v2, hiera, hubert, hunyuan_v1_dense, hunyuan_v1_moe, ibert, idefics, idefics2, idefics3, idefics3_vision, ijepa, imagegpt, informer, instructblip, instructblipvideo, internvl, internvl_vision, jamba, janus, jetmoe, jukebox, kosmos-2, kosmos-2.5, kyutai_speech_to_text, layoutlm, layoutlmv2, layoutlmv3, led, levit, lfm2, lfm2_vl, lightglue, lilt, llama, llama4, llama4_text, llava, llava_next, llava_next_video, llava_onevision, longcat_flash, longformer, longt5, luke, lxmert, m2m_100, mamba, mamba2, marian, markuplm, mask2former, maskformer, maskformer-swin, mbart, mctct, mega, megatron-bert, metaclip_2, mgp-str, mimi, minimax, ministral, mistral, mistral3, mixtral, mlcd, mllama, mm-grounding-dino, mobilebert, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, modernbert, modernbert-decoder, moonshine, moshi, mpnet, mpt, mra, mt5, musicgen, musicgen_melody, mvp, nat, nemotron, nezha, nllb-moe, nougat, nystromformer, olmo, olmo2, olmo3, olmoe, omdet-turbo, oneformer, open-llama, openai-gpt, opt, ovis2, owlv2, owlvit, paligemma, parakeet, parakeet_ctc, parakeet_encoder, patchtsmixer, patchtst, pegasus, pegasus_x, perceiver, perception_encoder, perception_lm, persimmon, phi, phi3, phi4_multimodal, phimoe, pix2struct, pixtral, plbart, poolformer, pop2piano, prompt_depth_anything, prophetnet, pvt, pvt_v2, qdqbert, qwen2, qwen2_5_omni, qwen2_5_vl, qwen2_5_vl_text, qwen2_audio, qwen2_audio_encoder, qwen2_moe, qwen2_vl, qwen2_vl_text, qwen3, qwen3_moe, qwen3_next, qwen3_omni_moe, qwen3_vl, qwen3_vl_moe, qwen3_vl_moe_text, qwen3_vl_text, rag, realm, recurrent_gemma, reformer, regnet, rembert, resnet, retribert, roberta, roberta-prelayernorm, roc_bert, roformer, rt_detr, rt_detr_resnet, rt_detr_v2, rwkv, sam, sam2, sam2_hiera_det_model, sam2_video, sam2_vision_model, sam_hq, sam_hq_vision_model, sam_vision_model, seamless_m4t, seamless_m4t_v2, seed_oss, segformer, seggpt, sew, sew-d, shieldgemma2, siglip, siglip2, siglip2_vision_model, siglip_vision_model, smollm3, smolvlm, smolvlm_vision, speech-encoder-decoder, speech_to_text, speech_to_text_2, speecht5, splinter, squeezebert, stablelm, starcoder2, superglue, superpoint, swiftformer, swin, swin2sr, swinv2, switch_transformers, t5, t5gemma, table-transformer, tapas, textnet, time_series_transformer, timesfm, timesformer, timm_backbone, timm_wrapper, trajectory_transformer, transfo-xl, trocr, tvlt, tvp, udop, umt5, unispeech, unispeech-sat, univnet, upernet, van, vaultgemma, video_llava, videomae, vilt, vipllava, vision-encoder-decoder, vision-text-dual-encoder, visual_bert, vit, vit_hybrid, vit_mae, vit_msn, vitdet, vitmatte, vitpose, vitpose_backbone, vits, vivit, vjepa2, voxtral, voxtral_encoder, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, xcodec, xglm, xlm, xlm-prophetnet, xlm-roberta, xlm-roberta-xl, xlnet, xlstm, xmod, yolos, yoso, zamba, zamba2, zoedepth

In [12]:
from huggingface_hub import snapshot_download
from transformers import AutoTokenizer, AutoModel
from safetensors.torch import load_file
import os, json, torch

# 1) Instantiate the base GTE (has custom code + proper auto_map)
base_id = "Alibaba-NLP/gte-multilingual-base"
tok = AutoTokenizer.from_pretrained(base_id, trust_remote_code=True)
model = AutoModel.from_pretrained(base_id, trust_remote_code=True)

# 2) Download the pj checkpoint locally
repo_id = "pj-mathematician/JobGTE-multilingual-base-v2"
ckpt_dir = snapshot_download(repo_id, allow_patterns=["checkpoint-400/*"])
ckpt = os.path.join(ckpt_dir, "checkpoint-400")

# 3) Load sharded safetensors (handles both single and multi-shard)
index_path = os.path.join(ckpt, "model.safetensors.index.json")
if os.path.exists(index_path):
    index = json.load(open(index_path))
    state_dict = {}
    for shard in sorted(set(index["weight_map"].values())):
        state_dict.update(load_file(os.path.join(ckpt, shard)))
else:
    # fallback if single shard name differs
    state_dict = load_file(os.path.join(ckpt, "model.safetensors"))

# 4) Put weights into the base model
missing, unexpected = model.load_state_dict(state_dict, strict=False)
print("Missing keys:", len(missing), "Unexpected keys:", len(unexpected))

# 5) Use it
inputs = tok(["hello world"], return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
    out = model(**inputs).last_hidden_state
emb = (out * inputs["attention_mask"].unsqueeze(-1)).sum(1) / inputs["attention_mask"].sum(1)
print(emb.shape)


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- configuration.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- modeling.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/611M [00:00<?, ?B/s]

Some weights of the model checkpoint at Alibaba-NLP/gte-multilingual-base were not used when initializing NewModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

checkpoint-400/model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

checkpoint-400/optimizer.pt:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

checkpoint-400/rng_state_0.pth:   0%|          | 0.00/16.0k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/205 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

checkpoint-400/scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

checkpoint-400/tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

checkpoint-400/scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

trainer_state.json: 0.00B [00:00, ?B/s]

checkpoint-400/training_args.bin:   0%|          | 0.00/5.62k [00:00<?, ?B/s]

: 