### Setup

inputs:

In [1]:
project = !gcloud config get-value project
PROJECT_ID = project[0]
PROJECT_ID

'osn-smartcapex-404-sbx'

In [2]:
REGION = 'europe-west1'
DATANAME = 'demo_vertex'
NOTEBOOK = 'traffic_improvement'

BASE_IMAGE = 'python:3.7-slim-buster'
TRAIN_COMPUTE = 'n1-standard-8' ## Choose wisly the compute_machine depending to the task (ex : 64 vCPUs, 240 GB RAM)

packages:

In [3]:
from google.cloud import bigquery
from google.cloud import aiplatform

import matplotlib.pyplot as plt
from datetime import datetime
import json

clients:

In [4]:
aiplatform.init(project=PROJECT_ID, location=REGION)
bq = bigquery.Client()

# helper function for queries
def bq_runner(query):
    return bq.query(query = query)


parameters:

In [5]:
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET = "osn-smartcapex-data-uploaded-sbx"
URI = f"gs://{BUCKET}/{DATANAME}/{NOTEBOOK}"
DIR = f"temp/{NOTEBOOK}"

In [6]:
SERVICE_ACCOUNT = !gcloud config list --format='value(core.account)' 
SERVICE_ACCOUNT = SERVICE_ACCOUNT[0]
SERVICE_ACCOUNT

'989544951348-compute@developer.gserviceaccount.com'

List the service accounts current roles:

In [7]:
!gcloud projects get-iam-policy $PROJECT_ID --filter="bindings.members:$SERVICE_ACCOUNT" --format='table(bindings.role)' --flatten="bindings[].members"

ROLE
roles/aiplatform.admin
roles/bigquery.admin
roles/bigquery.jobUser
roles/dataflow.admin
roles/dataflow.worker
roles/editor
roles/iam.serviceAccountUser
roles/ml.admin
roles/run.admin
roles/storage.admin
roles/storage.objectAdmin
roles/storage.objectViewer
roles/workflows.admin


environment:

In [8]:
#!rm -rf {DIR}
#!mkdir -p {DIR}

### Training

#### Assemble Python File for Training

Create the main python trainer file as /train.py:

In [9]:
!mkdir -p {DIR}/src

In [20]:
%%writefile  {DIR}/src/main.py

from google.cloud import bigquery
import pandas as pd
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import argparse
from src.conf import conf, conf_loader
from src.activation_model import get_affected_cells_with_interactions_between_upgrades
from src.cluster_selection import get_cluster_of_affected_sites
from src.traffic_improvement import get_all_traffic_improvement_features, compute_traffic_after, train_traffic_improvement_model
from src.traffic_improvement_trend import compute_traffic_by_region, train_trend_model_with_linear_regression


conf_loader('OSN')

# import parameters
parser = argparse.ArgumentParser()
parser.add_argument('--PROJECT_ID', dest = 'PROJECT_ID', type = str)
parser.add_argument('--DATANAME', dest = 'DATANAME', type = str)
parser.add_argument('--NOTEBOOK', dest = 'NOTEBOOK', type = str)

args = parser.parse_args()
PROJECT_ID = args.PROJECT_ID
DATANAME = args.DATANAME
NOTEBOOK = args.NOTEBOOK

#my_arg = args.my_arg
print(PROJECT_ID, DATANAME, NOTEBOOK)

# client for BQ
bq = bigquery.Client(project = PROJECT_ID)

df_traffic_weekly_kpis = bq.query(query = f"SELECT * FROM `osn-smartcapex-404-sbx.preprocessing.oss_counter_weekly`" ).to_dataframe()
df_distance = bq.query(query = f"SELECT * FROM `osn-smartcapex-404-sbx.intermediate.df_distance`" ).to_dataframe()
df_sites = bq.query(query = f"SELECT * FROM `osn-smartcapex-404-sbx.preprocessing.df_sites`" ).to_dataframe()



# Script
df_traffic_weekly_kpis = df_traffic_weekly_kpis.replace({'cell_band': {'F2_U2100': 'U2100', 'F3_U2100': 'U2100', 'F1_U900': 'U900',
                                                                           'F1_U2100':'U2100','F4_U2100':'U2100','F2_U900':'U900'}})
    
df_cell_affected = get_affected_cells_with_interactions_between_upgrades(df_traffic_weekly_kpis)

list_of_upgrades, sites_to_remove = get_cluster_of_affected_sites(df_cell_affected,
                                                                    df_distance,
                                                                    max_neighbors=
                                                                    conf['TRAFFIC_IMPROVEMENT'][
                                                                    'MAX_NUMBER_OF_NEIGHBORS'])

df_data_traffic_features = get_all_traffic_improvement_features(df_traffic_weekly_kpis,
                                                                       df_cell_affected,
                                                                       list_of_upgrades,
                                                                       sites_to_remove,
                                                                       upgraded_to_not_consider=[])
df_data_traffic_features = compute_traffic_after(df_data_traffic_features, df_traffic_weekly_kpis, 'total_data_traffic_dl_gb')
df_voice_traffic_features = get_all_traffic_improvement_features(df_traffic_weekly_kpis,
                                                                       df_cell_affected,
                                                                       list_of_upgrades,
                                                                       sites_to_remove,
                                                                       type_of_traffic='voice',
                                                                       kpi_to_compute_upgrade_effect=[
                                                                           "total_voice_traffic_kerlands"],
                                                                       upgraded_to_not_consider=[])
df_voice_traffic_features = compute_traffic_after(df_voice_traffic_features, df_traffic_weekly_kpis,'total_voice_traffic_kerlands')
                                                                    
model_rf_data = train_traffic_improvement_model(df_data_traffic_features,
                                type_of_traffic='data',
                                remove_samples_with_target_variable_lower=True,
                                bands_to_consider=['G900', 'G1800','L2600', 'L1800', 'L800', 'U2100', 'U900'])
model_rf_voice = train_traffic_improvement_model(df_voice_traffic_features,
                                type_of_traffic='voice',
                                remove_samples_with_target_variable_lower=True,
                                bands_to_consider=['G900', 'G1800','L2600', 'L1800', 'L800', 'U2100', 'U900'])

df_traffic_by_region = compute_traffic_by_region(df_sites,
                                                 df_traffic_weekly_kpis,
                                                 kpi_to_compute_trend=[
                                                 'total_data_traffic_dl_gb',
                                                 'total_voice_traffic_kerlands'])

train_trend_model_with_linear_regression(df_traffic_by_region,
                                         variable_to_group_by=['site_region'],
                                         kpi_to_compute_trend=['total_data_traffic_dl_gb'])
train_trend_model_with_linear_regression(df_traffic_by_region,
                                         variable_to_group_by=['site_region'],
                                         kpi_to_compute_trend=['total_voice_traffic_kerlands'])



# output data - to BQ
df_cell_affected.to_gbq(f"{PROJECT_ID}.{DATANAME}.df_cell_affected", f'{PROJECT_ID}', if_exists = 'replace')
list_of_upgrades.to_gbq(f"{PROJECT_ID}.{DATANAME}.list_of_upgrades", f'{PROJECT_ID}', if_exists = 'replace')
print(sites_to_remove)
df_data_traffic_features.to_gbq(f"{PROJECT_ID}.{DATANAME}.df_data_traffic_features", f'{PROJECT_ID}', if_exists = 'replace')
df_voice_traffic_features.to_gbq(f"{PROJECT_ID}.{DATANAME}.df_voice_traffic_features", f'{PROJECT_ID}', if_exists = 'replace')
df_traffic_by_region.to_gbq(f"{PROJECT_ID}.{DATANAME}.df_traffic_by_region", f'{PROJECT_ID}', if_exists = 'replace')



Overwriting temp/traffic_improvement/src/main.py


### Create Custom Container

* https://cloud.google.com/vertex-ai/docs/training/create-custom-container
* https://cloud.google.com/vertex-ai/docs/training/pre-built-containers
* https://cloud.google.com/vertex-ai/docs/general/deep-learning
* https://cloud.google.com/deep-learning-containers/docs/choosing-container

Choose a Base Image

In [21]:
BASE_IMAGE # Defined above in Setup

'python:3.7-slim-buster'

#### Create the Dockerfile

A basic dockerfile thats take the base image and copies the code in and define an entrypoint - what python script to run first in this case. Add RUN entries to pip install additional packages.

In [22]:
requirements = f"""
pandas
pystan==2.19.1.1
holidays==0.24
prophet==1.1.1
pandas-gbq
google-cloud-bigquery
pyarrow==0.13.0
scipy
tqdm
haversine==2.3.0
scikit-learn==0.24.1
sqlalchemy==1.4.2
"""
with open(f'{DIR}/requirements.txt', 'w') as f:
    f.write(requirements)

In [23]:
dockerfile = f"""
FROM {BASE_IMAGE}
WORKDIR /
## Install dependencies
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY src /src
## Sets up the entry point to invoke the trainer
ENTRYPOINT ["python", "-m", "src.main"]
"""
with open(f'{DIR}/Dockerfile', 'w') as f:
    f.write(dockerfile)

#### Setup Artifact Registry

The container will need to be stored in Artifact Registry, Container Registry or Docker Hub in order to be used by Vertex AI Training jobs. This notebook will setup Artifact registry and push a local (to this notebook) built container to it.

* https://cloud.google.com/artifact-registry/docs/docker/store-docker-container-images#gcloud

Enable Artifact Registry API:

Check to see if the api is enabled, if not then enable it:

In [24]:
services = !gcloud services list --format="json" --available --filter=name:artifactregistry.googleapis.com
services = json.loads("".join(services))

if (services[0]['config']['name'] == 'artifactregistry.googleapis.com') & (services[0]['state'] == 'ENABLED'):
    print(f"Artifact Registry is Enabled for This Project: {PROJECT_ID}")
else:
    print(f"Enabeling Artifact Registry for this Project: {PROJECT_ID}")
    !gcloud services enable artifactregistry.googleapis.com

Artifact Registry is Enabled for This Project: osn-smartcapex-404-sbx


Create A Repository

Check to see if the registry is already created, if not then create it

In [25]:
check_for_repo = !gcloud artifacts repositories describe {PROJECT_ID} --location={REGION}

if check_for_repo[0].startswith('ERROR'):
    print(f'Creating a repository named {PROJECT_ID}')
    !gcloud  artifacts repositories create {PROJECT_ID} --repository-format=docker --location={REGION} --description="Vertex AI Training Custom Containers"
else:
    print(f'There is already a repository named {PROJECT_ID}')

There is already a repository named osn-smartcapex-404-sbx


Configure Local Docker to Use GCLOUD CLI

In [26]:
!gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet


{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "europe-west1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: europe-west1-docker.pkg.dev
gcloud credential helpers already registered correctly.


Build The Custom Container (local to notebook)

In [27]:
IMAGE_URI=f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{PROJECT_ID}/{NOTEBOOK}:latest"
IMAGE_URI

'europe-west1-docker.pkg.dev/osn-smartcapex-404-sbx/osn-smartcapex-404-sbx/traffic_improvement:latest'

In [28]:
!docker build {DIR}/. -t $IMAGE_URI

Sending build context to Docker daemon  285.2kB
Step 1/6 : FROM python:3.7-slim-buster
 ---> 099f4583c701
Step 2/6 : WORKDIR /
 ---> Using cache
 ---> 62707eba35c7
Step 3/6 : COPY requirements.txt .
 ---> Using cache
 ---> 2ce4f97f4d99
Step 4/6 : RUN pip install -r requirements.txt
 ---> Using cache
 ---> 42986c257c72
Step 5/6 : COPY src /src
 ---> 4a311cdbaafa
Step 6/6 : ENTRYPOINT ["python", "-m", "src.main"]
 ---> Running in 9dcadce342d6
Removing intermediate container 9dcadce342d6
 ---> 198a6cea813f
Successfully built 198a6cea813f
Successfully tagged europe-west1-docker.pkg.dev/osn-smartcapex-404-sbx/osn-smartcapex-404-sbx/traffic_improvement:latest


Test The Custom Container (local to notebook)

In [29]:
!docker run {IMAGE_URI} --PROJECT_ID {PROJECT_ID} --DATANAME {DATANAME} --NOTEBOOK {NOTEBOOK} # add all your argument

osn-smartcapex-404-sbx demo_vertex traffic_improvement
1764
1764
8
44
              site_id  bands_upgraded
0    AUTOROUTE_THICKY               1
1  KEUR_THIEME_SAWARE               1
2         NDIASSANE02               1
3     NDIEYENE_SIRAKH               1
4       THS05_SNT_E10               1
Empty DataFrame
Columns: [cell_id, cell_name, site_id, cell_band, cell_tech, cell_sector, year, week, week_period, total_data_traffic_dl_gb, total_voice_traffic_kerlands, total_data_traffic_ul_gb, average_throughput_user_dl_kbps, average_throughput_user_ul_kbps, average_number_of_users_dl, average_number_of_users_ul, cell_occupation_dl_percentage, cell_occupation_ul_percentage, average_number_of_users_in_queue, tch_congestion, trx_900, trx_1800, U21_nbr_porteuse, U09_nbr_porteuse, total_nbr_porteuse, date, starting_week_site, starting_week_cell, is_upgrade]
Index: []
Index(['cell_id', 'cell_name', 'site_id', 'cell_band', 'cell_tech',
       'cell_sector', 'year', 'week', 'week_period',
       

Push The Custom Container To Artifact Registry

In [19]:
!docker push $IMAGE_URI

The push refers to repository [europe-west1-docker.pkg.dev/osn-smartcapex-404-sbx/osn-smartcapex-404-sbx/traffic_improvement]

[1Bf7edfc46: Preparing 
[1B3b7d092a: Preparing 
[1B5d625801: Preparing 
[1B8d012914: Preparing 
[1Bd30bdfa9: Preparing 
[1B9f968310: Preparing 
[1B55769c5e: Preparing 
[8Bf7edfc46: Pushed lready exists 2kB[8A[2K[7A[2K[2A[2K[1A[2K[8A[2Klatest: digest: sha256:2deaeebeaac3b6a5d947997bc423d2c17eaf333ed2a9eaa4391029be2fcc87d3 size: 1999


#### Setup Training Job

In [20]:
CMDARGS = [
    "--PROJECT_ID=" + PROJECT_ID,
    "--DATANAME=" + DATANAME,
    "--NOTEBOOK=" + NOTEBOOK
] # add your arg here

MACHINE_SPEC = {
    "machine_type": TRAIN_COMPUTE,
    "accelerator_count": 0
}

WORKER_POOL_SPEC = [
    {
        "replica_count": 1,
        "machine_spec": MACHINE_SPEC,
        "container_spec": {
            "image_uri": IMAGE_URI,
            "command": [],
            "args": CMDARGS
        }
    }
]

In [21]:
customJob = aiplatform.CustomJob(
    location= 'europe-west1',
    display_name = f'{NOTEBOOK}_{DATANAME}',
    worker_pool_specs = WORKER_POOL_SPEC,
    base_output_dir = f"{URI}/{TIMESTAMP}",
    staging_bucket = f"{URI}/{TIMESTAMP}",
    labels = {'notebook':f'{NOTEBOOK}'}
)

#### Run Training Job

In [22]:
customJob.run(
    service_account = SERVICE_ACCOUNT,
    sync = False
)

Creating CustomJob
CustomJob created. Resource name: projects/989544951348/locations/europe-west1/customJobs/4021832389863931904
To use this CustomJob in another session:
custom_job = aiplatform.CustomJob.get('projects/989544951348/locations/europe-west1/customJobs/4021832389863931904')
View Custom Job:
https://console.cloud.google.com/ai/platform/locations/europe-west1/training/4021832389863931904?project=989544951348
CustomJob projects/989544951348/locations/europe-west1/customJobs/4021832389863931904 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/989544951348/locations/europe-west1/customJobs/4021832389863931904 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/989544951348/locations/europe-west1/customJobs/4021832389863931904 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/989544951348/locations/europe-west1/customJobs/4021832389863931904 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/989544951348/locations/europe-west1/customJobs

In [None]:
customJob.display_name