## Install Required Libraries:

In [40]:
!pip install transformers torch google-cloud-storage google-api-python-client google-cloud-aiplatform tqdm



## Download the Hugging Face Model

In [17]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Specify the Hugging Face model ID
model_id = "ealvaradob/bert-finetuned-phishing"

# Download and save the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Download and save the model
model = AutoModelForSequenceClassification.from_pretrained(model_id)

# Save the tokenizer and model to the local directory
tokenizer.save_pretrained("./huggingface_model")
model.save_pretrained("./huggingface_model")

## Convert to tensorflow

In [18]:
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer

# Load the Hugging Face model and tokenizer
model_id = "ealvaradob/bert-finetuned-phishing"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = TFAutoModelForSequenceClassification.from_pretrained(model_id)

# Save the model as a TensorFlow SavedModel
model.save_pretrained("./huggingface_model/tf_model")
tokenizer.save_pretrained("./huggingface_model/tf_model")


ImportError: 
TFAutoModelForSequenceClassification requires the TensorFlow library but it was not found in your environment.
However, we were able to find a PyTorch installation. PyTorch classes do not begin
with "TF", but are otherwise identically named to our TF classes.
If you want to use PyTorch, please use those classes instead!

If you really do want to use TensorFlow, please follow the instructions on the
installation page https://www.tensorflow.org/install that match your environment.


## Compress the Model Files:

In [2]:
import os, tarfile

def compress_folder(input_dir, output_file):
    with tarfile.open(output_file, "w:gz") as tar:
        for root, _, files in os.walk(input_dir):
            for file in files:
                tar.add(os.path.join(root, file), arcname=os.path.relpath(os.path.join(root, file), input_dir))

compress_folder("./huggingface_model/tf_model", "model.tar.gz")



Compressing: 100%|██████████| 6/6 [01:17<00:00, 12.83s/file]


## Set Up a Google Cloud Project and Bucket:

In [3]:
!gcloud config set project tidy-reporter-417118
!gsutil mb gs://tidy-reporter-model-bucket/

Updated property [core/project].
Creating gs://tidy-reporter-model-bucket/...
ServiceException: 409 A Cloud Storage bucket named 'tidy-reporter-model-bucket' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


## Upload the Model to Google Cloud Storage:

In [4]:
!gsutil cp model.tar.gz gs://tidy-reporter-model-bucket/
!gsutil acl ch -u AllUsers:R gs://tidy-reporter-model-bucket/model.tar.gz

Copying file://model.tar.gz [Content-Type=application/x-tar]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

- [1 files][  1.2 GiB/  1.2 GiB]                                                
Operation completed over 1 objects/1.2 GiB.                                      
Updated ACL on gs://tidy-reporter-model-bucket/model.tar.gz


## Initialize the Vertex AI SDK

In [5]:
!gcloud services enable aiplatform.googleapis.com
from google.cloud import aiplatform

aiplatform.init(
    project="tidy-reporter-417118",
    location="us-central1",
    staging_bucket="gs://tidy-reporter-model-bucket/"
)

## Create and Deploy the Model with Vertex AI:

In [19]:
# Install necessary libraries
!pip install transformers torch google-cloud-aiplatform tqdm git-lfs

# Import necessary libraries
import os
import tarfile
from google.cloud import aiplatform

# Set Google Cloud settings
PROJECT_ID = "tidy-reporter-417118"
REGION = "us-central1"
REPOSITORY = "huggingface-models"
IMAGE = "bert-phishing"
TAG = "latest"

# Authenticate gcloud and configure Docker non-interactively
!gcloud config set project {PROJECT_ID}
!gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet

# Install Git LFS
!sudo apt-get install git-lfs
!git lfs install --skip-smudge

# Clone the Hugging Face model repository
!rm -rf huggingface_model
!git clone https://huggingface.co/ealvaradob/bert-finetuned-phishing huggingface_model

# Change directory to where the model is cloned
os.chdir('huggingface_model')

# Prepare the Dockerfile to include the model in the image
dockerfile_contents = """
FROM python:3.8-slim
WORKDIR /
COPY . /
RUN pip install transformers==4.38.1 torch==2.2.0
RUN python -c "from transformers import pipeline; classifier = pipeline('text-classification', model='./model')"
ENTRYPOINT ["python", "predictor.py"]
"""

# Write the Dockerfile
with open("Dockerfile", "w") as f:
    f.write(dockerfile_contents)

# Build and push the Docker image with the model included
image_uri = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}:{TAG}"
!docker build -t {image_uri} .
!docker push {image_uri}

# Initialize AI platform
aiplatform.init(project=PROJECT_ID, location=REGION)

# Upload and deploy the model directly using the image
model = aiplatform.Model.upload(
    display_name="bert-phishing",
    serving_container_image_uri=image_uri
)

endpoint = model.deploy(
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_T4",
    accelerator_count=1
)

print("Model deployed. Endpoint:", endpoint.resource_name)


Updated property [core/project].

{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
gcloud credential helpers already registered correctly.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (2.13.2-1+b5).
0 upgraded, 0 newly installed, 0 to remove and 6 not upgraded.
Git LFS initialized.
Cloning into 'huggingface_model'...
remote: Enumerating objects: 139, done.[K
remote: Counting objects: 100% (135/135), done.[K
remote: Compressing objects: 100% (135/135), done.[K
remote: Total 139 (delta 48), reused 0 (delta 0), pack-reused 4 (from 1)[K
Receiving objects: 100% (139/139), 332.70 KiB | 4.26 MiB/s, done.
Resolving deltas: 100% (48/48), done.
Sending buil

KeyboardInterrupt: 

In [17]:
# Attempt to re-upload the model.tar.gz file
!gsutil cp model.tar.gz gs://{BUCKET_NAME}/

# List files in the bucket to confirm upload
!gsutil ls gs://{BUCKET_NAME}/
!gsutil ls gs://{BUCKET_NAME}/model.tar.gz


Copying file://model.tar.gz [Content-Type=application/x-tar]...
/ [1 files][316.0 KiB/316.0 KiB]                                                
Operation completed over 1 objects/316.0 KiB.                                    
gs://tidy-reporter-model-bucket/model.tar.gz
gs://tidy-reporter-model-bucket/model.tar.gz


In [18]:
# Initialize AI platform with the correct project and location
aiplatform.init(project=PROJECT_ID, location=REGION)

# Upload and deploy the model
model = aiplatform.Model.upload(
    display_name="bert-phishing",
    artifact_uri=f"gs://{BUCKET_NAME}/model.tar.gz",
    serving_container_image_uri=f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}:{TAG}"
)

endpoint = model.deploy(
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_T4",
    accelerator_count=1
)

print("Model deployed. Endpoint:", endpoint.resource_name)


NotFound: 404 There are no files in directory "gs://tidy-reporter-model-bucket/model.tar.gz". Please check if the Cloud Storage URI is correct or copy at least one file to the directory.