# Setup

In particular this notebook requires an environment with the following custom dockerfile instructions:
```
RUN pip install mlflow==2.11.3 transformers datasets ipywidgets torch torchvision --upgrade

```

In [None]:
%matplotlib inline

In [None]:
import os

import numpy as np
import pandas as pd
from datasets import load_dataset, load_metric
from huggingface_hub import notebook_login
from matplotlib import pyplot as plt
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
)

import mlflow

## Huggingface login
NOTE: you need create a huggingface account if you don't already have one and use it to generate a token in the next cell

In [None]:
notebook_login()

In [None]:
!git config --global credential.helper store

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")


In [None]:
from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository
from mlflow import MlflowClient

client = MlflowClient()

model_name = "financial-news-sentiment-analysis"
#registered_model = client.create_registered_model(model_name)
with mlflow.start_run() as run:
    model_info = mlflow.pytorch.log_model(model, "model")
    
    runs_uri = model_info.model_uri
    
    # Create a new model version of the RandomForestRegression model from this run
    desc = "Pretrained Sentiment Analysis model from mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
    model_src = RunsArtifactRepository.get_underlying_uri(runs_uri)
    mv = client.create_model_version(model_name, model_src, run.info.run_id, description=desc)
    print("Name: {}".format(mv.name))
    print("Version: {}".format(mv.version))
    print("Description: {}".format(mv.description))
    print("Status: {}".format(mv.status))
    print("Stage: {}".format(mv.current_stage))

In [None]:
print(mv)

In [None]:
from transformers import RobertaTokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
text = "I am very excited today."
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

print(output)

In [None]:
from transformers import pipeline
classification = pipeline('text-classification', model=model, tokenizer=tokenizer)

model_name = "financial-news-sentiment-analysis-classification"
#registered_model = client.create_registered_model(model_name)
with mlflow.start_run() as run:
    model_info = mlflow.transformers.log_model(classification, "model")
    
    runs_uri = model_info.model_uri
    
    # Create a new model version of the RandomForestRegression model from this run
    desc = "Pretrained Sentiment Analysis model from mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis -- text classifier"
    model_src = RunsArtifactRepository.get_underlying_uri(runs_uri)
    mv = client.create_model_version(model_name, model_src, run.info.run_id, description=desc)
    print("Name: {}".format(mv.name))
    print("Version: {}".format(mv.version))
    print("Description: {}".format(mv.description))
    print("Status: {}".format(mv.status))
    print("Stage: {}".format(mv.current_stage))


In [None]:
print(mv)

In [None]:
classification(text)