In [1]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt_tab')
nltk.download('stopwords')
def clean_text(text: str, lang: str = 'english') -> str:
    # lowercase
    text = text.lower()

    # tokenize
    tokens = word_tokenize(text)

    # get stopword list for language
    stop_words = set(stopwords.words(lang))

    # filter stopwords
    tokens = [t for t in tokens if t not in stop_words]

    # join back to string
    return " ".join(tokens)


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
import torch
device='cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
import pandas as pd

splits = {'train': 'data/train-00000-of-00001.parquet', 'validation': 'data/validation-00000-of-00001.parquet', 'test': 'data/test-00000-of-00001.parquet'}
df = pd.read_parquet("hf://datasets/cfilt/iitb-english-hindi/" + splits["train"])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
len(df)

1659083

In [5]:
# df=df[:1000]

In [6]:
df

Unnamed: 0,translation
0,{'en': 'Give your application an accessibility...
1,"{'en': 'Accerciser Accessibility Explorer', 'h..."
2,{'en': 'The default plugin layout for the bott...
3,{'en': 'The default plugin layout for the top ...
4,{'en': 'A list of plugins that are disabled by...
...,...
1659078,"{'en': 'The Prime Minister, Shri Narendra Modi..."
1659079,"{'en': 'In a tweet, the Prime Minister said, c..."
1659080,{'en': 'I also congratulate all those who took...
1659081,{'en': 'The NDA family will work together for ...


In [7]:
df['English']=df['translation'].apply(lambda x:x['en'])
df['Hindi']=df['translation'].apply(lambda x:x['hi'])
df.drop(columns=['translation'],inplace=True)
df


Unnamed: 0,English,Hindi
0,Give your application an accessibility workout,अपने अनुप्रयोग को पहुंचनीयता व्यायाम का लाभ दें
1,Accerciser Accessibility Explorer,एक्सेर्साइसर पहुंचनीयता अन्वेषक
2,The default plugin layout for the bottom panel,निचले पटल के लिए डिफोल्ट प्लग-इन खाका
3,The default plugin layout for the top panel,ऊपरी पटल के लिए डिफोल्ट प्लग-इन खाका
4,A list of plugins that are disabled by default,उन प्लग-इनों की सूची जिन्हें डिफोल्ट रूप से नि...
...,...,...
1659078,"The Prime Minister, Shri Narendra Modi has con...",प्रधानमंत्री श्री नरेन्द्र मोदी ने बिहार के मु...
1659079,"In a tweet, the Prime Minister said, congratul...","एक ट्वीट में प्रधानमंत्री ने कहा, बिहार के मुख..."
1659080,I also congratulate all those who took oath as...,"मैं उन सभी को भी बधाई देता हूं, जिन्होंने बिहा..."
1659081,The NDA family will work together for the prog...,एनडीए परिवार बिहार की प्रगति के लिए साथ मिलकर ...


In [8]:
import os
if not os.path.exists("data.csv"):
  df['English']=df['English'].apply(lambda x:clean_text(x))
  df['Hindi']=df['Hindi'].apply(lambda x:clean_text(x))
  df
else:
  df=pd.read_csv("data.csv")

In [9]:
df.to_csv('data.csv',index=False)

In [10]:
from typing import List
from collections import Counter
def build_vocabs(sentences:List[str]):
  vocab=Counter(' '.join(sentences).split())
  vocab={k:i+3 for i,(k,v) in enumerate(vocab.items())}
  vocab['<pad>']=0
  vocab['<pos>']=1
  vocab['<eos>']=2
  return vocab

In [11]:
en_vocab=build_vocabs(df['English'].astype(str).fillna(''))
hi_vocab=build_vocabs(df['Hindi'].astype(str).fillna(''))

In [12]:
def sent_tokens(sentence:str,vocab):
  tokens=[vocab['<pos>']]
  tokens+=[vocab[w] for w in sentence.split()]
  tokens+=[vocab['<eos>']]
  return tokens

In [13]:
sent_tokens("give application accessibility workout",en_vocab)

[1, 3, 4, 5, 6, 2]

In [14]:
import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn.utils.rnn as rnn_utils


In [15]:
import numpy as np

def create_memmap(df, en_vocab, hi_vocab):

    n = len(df)

    max_en = max(len(sent_tokens(s, en_vocab)) for s in df['English'])
    max_hi = max(len(sent_tokens(s, hi_vocab)) for s in df['Hindi'])

    en_mem = np.memmap("en.dat", dtype='int32', mode='w+', shape=(n, max_en))
    hi_mem = np.memmap("hi.dat", dtype='int32', mode='w+', shape=(n, max_hi))

    en_mem[:] = en_vocab['<pad>']
    hi_mem[:] = hi_vocab['<pad>']

    for i in range(n):

        en_tokens = sent_tokens(df['English'].iloc[i], en_vocab)
        hi_tokens = sent_tokens(df['Hindi'].iloc[i], hi_vocab)

        en_mem[i, :len(en_tokens)] = en_tokens
        hi_mem[i, :len(hi_tokens)] = hi_tokens

    en_mem.flush()
    hi_mem.flush()

    return (n, max_en), (n, max_hi)

In [16]:
if not os.path.exists("en.dat") or not os.path.exists("hi.dat"):
  print("Creating memmaps...")
  cleaned_df = df.copy()
  cleaned_df['English'] = cleaned_df['English'].astype(str).fillna('')
  cleaned_df['Hindi'] = cleaned_df['Hindi'].astype(str).fillna('')

  en_shape, hi_shape = create_memmap(cleaned_df, en_vocab, hi_vocab)

Creating memmaps...


In [17]:
import torch
from torch.utils.data import Dataset

class Sent_data_loader(Dataset):
    def __init__(self, en_shape, hi_shape):
        self.en = np.memmap("en.dat", dtype="int32", mode="r", shape=en_shape)
        self.hi = np.memmap("hi.dat", dtype="int32", mode="r", shape=hi_shape)

    def __len__(self):
        return self.en.shape[0]

    def __getitem__(self, idx):
        return torch.from_numpy(self.en[idx]).long(), \
               torch.from_numpy(self.hi[idx]).long()

In [18]:
test=Sent_data_loader(en_shape,hi_shape)

test[0]

  return torch.from_numpy(self.en[idx]).long(), \


(tensor([1, 3, 4, 5, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([ 1,  3,  4,  5,  6,  7,  8,  9, 10,  2,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]))

In [19]:
dataset=Sent_data_loader(en_shape,hi_shape)
data_loader=DataLoader(dataset=dataset,batch_size=8,shuffle=False)

In [20]:
for _ in data_loader:
  print(_)
  break

[tensor([[ 1,  3,  4,  5,  6,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0],
        [ 1,  7,  5,  8,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0],
        [ 1,  9, 10, 11, 12, 13,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0],
        [ 1,  9, 10, 11, 14, 13,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0],
        [ 1, 15, 16, 17,  9,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0],
        [ 1, 18, 19,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0],
        [ 1, 19, 18, 20, 21, 22, 23,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0],
        [ 1, 18, 24, 25,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0]]), tensor([[ 1,  3,  4,  5,  6,  7,  8,  9, 10,  2,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 1, 11,  6, 12,  2,  0,  0,  0,  0,  0,  0,  0,  

In [21]:
import torch.nn as nn

In [22]:
class Encoder(nn.Module):
  def __init__(self,input_size,embed_size,hidden_size):
    super().__init__()
    self.embedding=nn.Embedding(input_size,embed_size)
    self.rnn=nn.GRU(embed_size,hidden_size,batch_first=True)
  def forward(self,x):
    embedding=self.embedding(x)
    outputs, hidden = self.rnn(embedding)
    return hidden


class Decoder(nn.Module):
  def __init__(self,output_size,embed_size,hidden_size):
    super().__init__()
    self.embedding=nn.Embedding(output_size,embed_size)
    self.rnn=nn.GRU(embed_size,hidden_size,batch_first=True)
    self.ff=nn.Linear(hidden_size,output_size)
  def forward(self,x,hidden):

    embedded = self.embedding(x).unsqueeze(1)
    output, hidden = self.rnn(embedded, hidden)

    prediction = self.ff(output.squeeze(1))
    return prediction, hidden


class seq2seq(nn.Module):
  def __init__(self,encoder,decoder):
    super().__init__()
    self.encoder=encoder
    self.decoder=decoder

  def forward(self, src, trg, teacher_forcing_ratio=0.5):

        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        trg_vocab_size = self.decoder.ff.out_features
        outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(trg.device)

        encoder_hidden = self.encoder(src)

        input = trg[:, 0]

        for t in range(1, trg_len):

            output, encoder_hidden = self.decoder(input, encoder_hidden)

            outputs[:, t, :] = output

            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            top1 = output.argmax(1)

            input = trg[:, t] if teacher_force else top1


        return outputs[:, 1:, :]

In [23]:
input_size_en=len(en_vocab)
output_size_hi=len(hi_vocab)

In [24]:
encoder = Encoder(input_size=input_size_en, embed_size=256, hidden_size=512)
decoder = Decoder(output_size=output_size_hi, embed_size=256, hidden_size=512)

model = seq2seq(encoder, decoder)

# Move model to device BEFORE initializing the optimizer
model.to(device)

criterion = nn.CrossEntropyLoss(ignore_index=en_vocab["<pad>"])
optimizer = torch.optim.Adam(model.parameters())

In [25]:
device='cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [26]:
import tqdm
for epoch in range(100):
  model.train()
  total_loss=0
  for i,batch in tqdm.tqdm(enumerate(data_loader)):
    src_tensor=batch[0] # Shape: (batch_size, src_seq_len)
    trg_tensor=batch[1] # Shape: (batch_size, trg_seq_len)
    src_tensor=src_tensor.to(device)
    trg_tensor=trg_tensor.to(device)
    optimizer.zero_grad()

    output = model(src_tensor, trg_tensor)

    trg_target_for_loss = trg_tensor[:, 1:]

    output_dim = output.shape[-1]
    reshaped_output = output.reshape(-1, output_dim)

    reshaped_trg_target = trg_target_for_loss.reshape(-1)

    loss = criterion(reshaped_output, reshaped_trg_target)

    loss.backward()
    optimizer.step()

    total_loss+=loss.item()
  avg_loss=total_loss/len(data_loader)
  print(f"Epoch: {epoch+1},Loss: {avg_loss:.4f}")

125it [00:06, 20.48it/s]


Epoch: 1,Loss: 4.4307


125it [00:05, 24.16it/s]


Epoch: 2,Loss: 2.5995


125it [00:08, 15.54it/s]


Epoch: 3,Loss: 0.9380


125it [00:05, 24.37it/s]


Epoch: 4,Loss: 0.3596


125it [00:05, 21.53it/s]


Epoch: 5,Loss: 0.2339


125it [00:05, 23.99it/s]


Epoch: 6,Loss: 0.1630


125it [00:05, 22.16it/s]


Epoch: 7,Loss: 0.1223


125it [00:05, 23.38it/s]


Epoch: 8,Loss: 0.1161


125it [00:05, 24.54it/s]


Epoch: 9,Loss: 0.0978


125it [00:05, 21.88it/s]


Epoch: 10,Loss: 0.1059


125it [00:05, 24.80it/s]


Epoch: 11,Loss: 0.0978


125it [00:05, 22.15it/s]


Epoch: 12,Loss: 0.1136


125it [00:04, 25.33it/s]


Epoch: 13,Loss: 0.1035


125it [00:07, 17.70it/s]


Epoch: 14,Loss: 0.1158


125it [00:05, 24.77it/s]


Epoch: 15,Loss: 0.0896


125it [00:05, 21.75it/s]


Epoch: 16,Loss: 0.0952


125it [00:05, 24.23it/s]


Epoch: 17,Loss: 0.0856


125it [00:05, 23.11it/s]


Epoch: 18,Loss: 0.0808


125it [00:05, 23.02it/s]


Epoch: 19,Loss: 0.0753


125it [00:04, 25.10it/s]


Epoch: 20,Loss: 0.0834


125it [00:05, 22.08it/s]


Epoch: 21,Loss: 0.0855


125it [00:04, 25.01it/s]


Epoch: 22,Loss: 0.0836


125it [00:05, 21.36it/s]


Epoch: 23,Loss: 0.0811


125it [00:04, 25.08it/s]


Epoch: 24,Loss: 0.0886


125it [00:05, 22.24it/s]


Epoch: 25,Loss: 0.0718


125it [00:05, 24.97it/s]


Epoch: 26,Loss: 0.0817


125it [00:05, 23.88it/s]


Epoch: 27,Loss: 0.0640


125it [00:05, 23.36it/s]


Epoch: 28,Loss: 0.0878


125it [00:05, 24.96it/s]


Epoch: 29,Loss: 0.0902


125it [00:05, 22.07it/s]


Epoch: 30,Loss: 0.0961


125it [00:04, 25.42it/s]


Epoch: 31,Loss: 0.0846


125it [00:05, 22.05it/s]


Epoch: 32,Loss: 0.0765


125it [00:04, 25.19it/s]


Epoch: 33,Loss: 0.0799


125it [00:05, 22.24it/s]


Epoch: 34,Loss: 0.0898


125it [00:05, 24.47it/s]


Epoch: 35,Loss: 0.0782


125it [00:04, 25.00it/s]


Epoch: 36,Loss: 0.0805


125it [00:05, 22.05it/s]


Epoch: 37,Loss: 0.0762


125it [00:04, 25.07it/s]


Epoch: 38,Loss: 0.0887


125it [00:07, 17.25it/s]


Epoch: 39,Loss: 0.0835


125it [00:04, 25.43it/s]


Epoch: 40,Loss: 0.0757


125it [00:05, 21.74it/s]


Epoch: 41,Loss: 0.0873


125it [00:04, 25.27it/s]


Epoch: 42,Loss: 0.1228


125it [00:05, 22.49it/s]


Epoch: 43,Loss: 0.1669


125it [00:05, 24.96it/s]


Epoch: 44,Loss: 0.3121


125it [00:05, 24.28it/s]


Epoch: 45,Loss: 0.1480


125it [00:05, 22.57it/s]


Epoch: 46,Loss: 0.0841


125it [00:04, 25.15it/s]


Epoch: 47,Loss: 0.0814


125it [00:05, 22.15it/s]


Epoch: 48,Loss: 0.0813


125it [00:05, 24.81it/s]


Epoch: 49,Loss: 0.0694


125it [00:06, 20.52it/s]


Epoch: 50,Loss: 0.0729


125it [00:05, 24.94it/s]


Epoch: 51,Loss: 0.0751


125it [00:05, 22.23it/s]


Epoch: 52,Loss: 0.0815


125it [00:04, 25.27it/s]


Epoch: 53,Loss: 0.0672


125it [00:05, 24.55it/s]


Epoch: 54,Loss: 0.0737


125it [00:05, 22.56it/s]


Epoch: 55,Loss: 0.0649


125it [00:04, 25.13it/s]


Epoch: 56,Loss: 0.0666


125it [00:05, 22.07it/s]


Epoch: 57,Loss: 0.0643


125it [00:04, 25.22it/s]


Epoch: 58,Loss: 0.0771


125it [00:05, 22.01it/s]


Epoch: 59,Loss: 0.0759


125it [00:04, 25.34it/s]


Epoch: 60,Loss: 0.0894


125it [00:05, 22.53it/s]


Epoch: 61,Loss: 0.0747


125it [00:05, 23.28it/s]


Epoch: 62,Loss: 0.0626


125it [00:04, 25.03it/s]


Epoch: 63,Loss: 0.0698


125it [00:05, 21.80it/s]


Epoch: 64,Loss: 0.0839


125it [00:04, 25.15it/s]


Epoch: 65,Loss: 0.0723


125it [00:05, 22.32it/s]


Epoch: 66,Loss: 0.0815


125it [00:04, 25.35it/s]


Epoch: 67,Loss: 0.0800


125it [00:05, 22.23it/s]


Epoch: 68,Loss: 0.0769


125it [00:04, 25.41it/s]


Epoch: 69,Loss: 0.0811


125it [00:05, 24.14it/s]


Epoch: 70,Loss: 0.0831


125it [00:05, 22.83it/s]


Epoch: 71,Loss: 0.0730


125it [00:05, 24.92it/s]


Epoch: 72,Loss: 0.0652


125it [00:05, 21.67it/s]


Epoch: 73,Loss: 0.0718


125it [00:05, 24.40it/s]


Epoch: 74,Loss: 0.0790


125it [00:05, 22.08it/s]


Epoch: 75,Loss: 0.0740


125it [00:04, 25.33it/s]


Epoch: 76,Loss: 0.0789


125it [00:05, 22.19it/s]


Epoch: 77,Loss: 0.0634


125it [00:05, 24.83it/s]


Epoch: 78,Loss: 0.0810


125it [00:04, 25.24it/s]


Epoch: 79,Loss: 0.0841


125it [00:05, 22.18it/s]


Epoch: 80,Loss: 0.0660


125it [00:05, 24.90it/s]


Epoch: 81,Loss: 0.0786


125it [00:05, 22.10it/s]


Epoch: 82,Loss: 0.0681


125it [00:04, 25.01it/s]


Epoch: 83,Loss: 0.0752


125it [00:05, 21.70it/s]


Epoch: 84,Loss: 0.0678


125it [00:05, 24.74it/s]


Epoch: 85,Loss: 0.0643


125it [00:05, 22.41it/s]


Epoch: 86,Loss: 0.0604


125it [00:05, 24.03it/s]


Epoch: 87,Loss: 0.0654


125it [00:05, 24.89it/s]


Epoch: 88,Loss: 0.0726


125it [00:05, 21.96it/s]


Epoch: 89,Loss: 0.0785


125it [00:05, 24.89it/s]


Epoch: 90,Loss: 0.0782


125it [00:05, 21.61it/s]


Epoch: 91,Loss: 0.0734


125it [00:04, 25.02it/s]


Epoch: 92,Loss: 0.0756


125it [00:05, 21.72it/s]


Epoch: 93,Loss: 0.0721


125it [00:05, 24.91it/s]


Epoch: 94,Loss: 0.0811


125it [00:05, 22.74it/s]


Epoch: 95,Loss: 0.0659


125it [00:05, 23.77it/s]


Epoch: 96,Loss: 0.0626


125it [00:05, 24.68it/s]


Epoch: 97,Loss: 0.0661


125it [00:05, 22.24it/s]


Epoch: 98,Loss: 0.0703


125it [00:04, 25.09it/s]


Epoch: 99,Loss: 0.0644


125it [00:05, 21.45it/s]

Epoch: 100,Loss: 0.0649





In [27]:
import torch
import numpy as np

def predict(sentence, model, en_vocab, hi_vocab, max_len=50, device='cpu'):
  model.eval()

  hi_idx_to_word = {v: k for k, v in hi_vocab.items()}

  tokenized_en_sent = sent_tokens(sentence, en_vocab)
  src_tensor = torch.tensor(tokenized_en_sent, dtype=torch.long).unsqueeze(0).to(device)

  with torch.no_grad():
    encoder_hidden = model.encoder(src_tensor)

    decoder_input = torch.tensor([hi_vocab['<pos>']], dtype=torch.long).to(device)
    predicted_hi_tokens = []

    for _ in range(max_len):
      output, encoder_hidden = model.decoder(decoder_input, encoder_hidden)

      predicted_token_id = output.argmax(1).item()
      predicted_hi_tokens.append(predicted_token_id)

      if predicted_token_id == hi_vocab['<eos>']:
        break

      decoder_input = torch.tensor([predicted_token_id], dtype=torch.long).to(device)


  if predicted_hi_tokens and predicted_hi_tokens[0] == hi_vocab['<pos>']:
      predicted_hi_tokens = predicted_hi_tokens[1:]
  if predicted_hi_tokens and predicted_hi_tokens[-1] == hi_vocab['<eos>']:
      predicted_hi_tokens = predicted_hi_tokens[:-1]

  predicted_sentence = ' '.join([hi_idx_to_word[token_id] for token_id in predicted_hi_tokens if token_id in hi_idx_to_word])
  return predicted_sentence

In [28]:
predict("give application accessibility workout",model=model,en_vocab=en_vocab,hi_vocab=hi_vocab,device=device)

'अपने अनुप्रयोग को पहुंचनीयता व्यायाम का लाभ दें'

In [29]:
torch.save(model.state_dict(), 'model.pth')

In [30]:
from google.colab import files
files.download("/content/model.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [31]:
from google.colab import files
files.download("/content/en.dat")

from google.colab import files
files.download("/content/hi.dat")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [33]:
torch.save(en_vocab,"en_vocab.pth")
torch.save(hi_vocab,"hi_vocab.pth")

In [34]:
from google.colab import files
files.download("/content/en_vocab.pth")
files.download("/content/hi_vocab.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [35]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-3.10.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-skinny==3.10.0 (from mlflow)
  Downloading mlflow_skinny-3.10.0-py3-none-any.whl.metadata (32 kB)
Collecting mlflow-tracing==3.10.0 (from mlflow)
  Downloading mlflow_tracing-3.10.0-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.2-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<26 (from mlflow)
  Downloading gunicorn-25.1.0-py3-none-any.whl.metadata (5.5 kB)
Collecting huey<3,>=2.5.4 (from mlflow)
  Downloading huey-2.6.0-py3-none-any.whl.metadata (4.3 kB)
Collecting skops<1 (from mlflow)
  Downloading skops-0.13.0-py3-none-any.whl.metadata (5.6 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.10.0->mlflow)
  D

In [37]:
!pip install dagshub
import dagshub
dagshub.init(repo_owner='vanshsharma7832', repo_name='Sentence-Translator', mlflow=True)





Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=7122499f-4a30-48f5-bb81-cebd5dbeb6eb&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=d69209eee37bc01e33d0d52592ae719b884f0b18c66d6e90dc33f13164ded3ba




In [42]:
import mlflow
with mlflow.start_run():
  mlflow.log_metric('loss', 0.0644)
  # mlflow.log_artifact('model.pth',model)

🏃 View run enthused-donkey-213 at: https://dagshub.com/vanshsharma7832/Sentence-Translator.mlflow/#/experiments/0/runs/6f6da30dc49d45ae97b252a9788c132c
🧪 View experiment at: https://dagshub.com/vanshsharma7832/Sentence-Translator.mlflow/#/experiments/0


8.11653953244604