In [120]:

import os
import pandas as pd

from google.cloud import bigquery
from google.cloud import storage

## sklearn module
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import pickle

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler, OrdinalEncoder, TargetEncoder


In [74]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "cloud/trial_bigq.json"

## Upload dataset to GCS for Andre Suchitra

In [75]:
project_id = 'dtidsus'
dataset_id = 'capstone'
table_id = 'data_saudi_used_cars'
region = 'us-central1'
bucket_name = 'modul4'
blob_name = 'andre_suchitra/saudi_used_cars.csv'

In [76]:
client = bigquery.Client(project=project_id)
# check list of bucket
storage_client = storage.Client(project=project_id)



In [77]:
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name)
    data_capstone = bucket.blob('andre_suchitra/saudi_used_cars.csv')
    data_capstone.upload_from_filename('data/data_saudi_used_cars.csv')

    print (f"Uploading dataset \"{blob_name}\" succeeded")
except:
    raise TypeError("An exception occurred")

Uploading dataset "andre_suchitra/saudi_used_cars.csv" succeeded


## Validate Andre Suchitra dataset is in the blob list

In [78]:

def check_blob_list():
    # check list of blob
    bucket = storage_client.get_bucket(bucket_name)
    blobs = bucket.list_blobs()
    for blob in blobs:
        print(blob.name)

check_blob_list()

Bril/data_ecommerce_customer_churn.csv
Bril/model/final_model_XGB.pkl
adji/data_daegu_apartment.csv
afdal/data_customer_lifetime_value.csv
andre_suchitra/model/saudi_cars_price_predictor.pkl
andre_suchitra/saudi_cars_price_predictor.pkl
andre_suchitra/saudi_used_cars.csv
davis/data_ecommerce_customer_churn.csv
davis/df_customer.csv
diah/data_california_house.csv
fahd/data_telco_customer_churn.csv
huwai/data_ecommerce_customer_churn.csv
huwai/model/model.pkl
ilham/data_bank_marketing_campaign.csv
ilham/data_bike_sharing.csv
ilham/data_california_house.csv
ilham/data_customer_lifetime_value.csv
ilham/data_daegu_apartment.csv
ilham/data_ecommerce_customer_churn.csv
ilham/data_hotel_booking_demand.csv
ilham/data_saudi_used_cars.csv
ilham/data_telco_customer_churn.csv
ilham/data_travel_insurance.csv
ilham/german_dataset.csv
ilham/model/model.pkl
ilham/model/modelilham.pkl
ilham/vertex-ai-deployment_old.ipynb
kevin/telco_customer_churn_dataset.csv
m_fahd/model/model.pkl
niken/data_california

In [79]:
client = bigquery.Client(project='dtidsus')

In [80]:
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")

# print the result
print(query_job.result())

<google.cloud.bigquery.table.RowIterator object at 0x3145c09e0>


In [81]:

df = query_job.result().to_dataframe()



In [82]:

result = df
result.head()

Unnamed: 0,Type,Region,Make,Gear_Type,Origin,Options,Year,Engine_Size,Mileage,Negotiable,Price
0,2,Riyadh,Mazda,Automatic,Saudi,Standard,2011,1.5,37000,False,14000
1,3,Al-Medina,Mazda,Automatic,Saudi,Standard,2018,1.8,53000,False,54000
2,3,Al-Medina,Mazda,Automatic,Saudi,Standard,2018,2.0,31000,False,54000
3,3,Al-Medina,Mazda,Automatic,Saudi,Standard,2018,2.0,47000,False,54000
4,3,Riyadh,Mazda,Automatic,Saudi,Semi Full,2016,2.0,104800,False,39000


In [83]:
result.isna().sum()

Type           0
Region         0
Make           0
Gear_Type      0
Origin         0
Options        0
Year           0
Engine_Size    0
Mileage        0
Negotiable     0
Price          0
dtype: int64

In [84]:
result.describe(include='all')

Unnamed: 0,Type,Region,Make,Gear_Type,Origin,Options,Year,Engine_Size,Mileage,Negotiable,Price
count,5624,5624,5624,5624,5624,5624,5624.0,5624.0,5624.0,5624,5624.0
unique,347,27,58,2,4,3,,,,2,
top,Land Cruiser,Riyadh,Toyota,Automatic,Saudi,Full,,,,False,
freq,269,2272,1431,4875,4188,2233,,,,3828,
mean,,,,,,,2014.101885,3.29543,150923.375,,53074.058144
std,,,,,,,5.791606,1.515108,382835.963005,,70155.340614
min,,,,,,,1963.0,1.0,100.0,,0.0
25%,,,,,,,2012.0,2.0,38000.0,,0.0
50%,,,,,,,2016.0,3.0,103000.0,,36500.0
75%,,,,,,,2018.0,4.5,196000.0,,72932.5


## Upload the generated model

From `main.ipynb`, we already completed the model training and generation. The path should be located in `models/saudi_used_cars_predictor.pkl`

We will upload the model to GCS

In [85]:
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) 
    blob_model = bucket.blob('andre_suchitra/model/model.pkl')
    blob_model.upload_from_filename('models/saudi_cars_price_predictor.pkl')

    print (f"Uploading model succeeded")
except:
    raise TypeError("An exception occurred")

Uploading model succeeded


In [108]:

# print folders in bucket
bucket = storage_client.get_bucket(bucket_name)
blobs = bucket.list_blobs()
for blob in blobs:
    print(blob.name)


Bril/data_ecommerce_customer_churn.csv
Bril/model/final_model_XGB.pkl
adji/data_daegu_apartment.csv
afdal/data_customer_lifetime_value.csv
afdal/model.pkl
andre_suchitra/model/model.pkl
andre_suchitra/model/saudi_cars_price_predictor.pkl
andre_suchitra/saudi_cars_price_predictor.pkl
andre_suchitra/saudi_used_cars.csv
davis/data_ecommerce_customer_churn.csv
davis/df_customer.csv
diah/data_california_house.csv
fahd/data_telco_customer_churn.csv
huwai/data_ecommerce_customer_churn.csv
huwai/model/model.pkl
ilham/data_bank_marketing_campaign.csv
ilham/data_bike_sharing.csv
ilham/data_california_house.csv
ilham/data_customer_lifetime_value.csv
ilham/data_daegu_apartment.csv
ilham/data_ecommerce_customer_churn.csv
ilham/data_hotel_booking_demand.csv
ilham/data_saudi_used_cars.csv
ilham/data_telco_customer_churn.csv
ilham/data_travel_insurance.csv
ilham/german_dataset.csv
ilham/model/model.pkl
ilham/model/modelilham.pkl
ilham/vertex-ai-deployment_old.ipynb
katriel/data_bank_marketing_campaign

In [95]:
from google.cloud import aiplatform

In [111]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "cloud/dev_trial.json"

In [113]:
aiplatform.init(project='dtidsus', location='us-central1')

blob = bucket.blob('andre_suchitra/model/model.pkl')
print(f"Model exists in GCS: {blob.exists()}")

model = aiplatform.Model.upload(
    display_name='andre_suchitra_model',
    artifact_uri=f"gs://{bucket_name}/andre_suchitra/model",
    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest",
)

model.wait()

Model exists in GCS: True
Creating Model
Create Model backing LRO: projects/41965541199/locations/us-central1/models/889695122382389248/operations/5427069994611507200
Model created. Resource name: projects/41965541199/locations/us-central1/models/889695122382389248@1
To use this Model in another session:
model = aiplatform.Model('projects/41965541199/locations/us-central1/models/889695122382389248@1')


In [114]:
endpoint = aiplatform.Endpoint.create(
    display_name="andre-suchitra-endpoint-000",
    project='dtidsus',
    location='us-central1',
)

Creating Endpoint
Create Endpoint backing LRO: projects/41965541199/locations/us-central1/endpoints/3101336373374746624/operations/7588797815749345280
Endpoint created. Resource name: projects/41965541199/locations/us-central1/endpoints/3101336373374746624
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/41965541199/locations/us-central1/endpoints/3101336373374746624')


In [115]:
min_replica_count: int = 1
max_replica_count: int = 1

In [119]:
try:
    endpoint.deploy( 
        model=model,
        deployed_model_display_name='andre_suchitra_model',
        machine_type='e2-standard-2',
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count,
        traffic_split={"0": 100},
        sync=True, 
    )
except Exception as e:
    print(f"\nDeployment failed with error: {str(e)}")

Deploying Model projects/41965541199/locations/us-central1/models/889695122382389248 to Endpoint : projects/41965541199/locations/us-central1/endpoints/3101336373374746624
Deploy Endpoint model backing LRO: projects/41965541199/locations/us-central1/endpoints/3101336373374746624/operations/889693370035732480

Deployment failed with error: 400 Model server exited unexpectedly. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=41965541199&resource=aiplatform.googleapis.com%2FEndpoint&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FEndpoint%22%0Aresource.labels.endpoint_id%3D%223101336373374746624%22%0Aresource.labels.location%3D%22us-central1%22. 9: Model server exited unexpectedly. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=41965541199&resource=aiplatform.googleapis.com%2FEndpoint&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FEndpoint%22%0Aresource.labels.endpoint_id%3D%2231013

## Using endpoint

In [117]:
# model endpoint data

PROJECT_ID = 'dtidsus'
ENDPOINT_ID = "projects/41965541199/locations/us-central1/endpoints/3101336373374746624"
REGION = 'us-central1'



In [127]:
numerical_features = ['Year', 'Mileage', 'Engine_Size']
categorical_features = ['Make', 'Model', 'Type', 'Origin', 'Gear_Type', 'Options', 'Region']
options = ['Standard', 'Semi Full', 'Full']

def generate_preprocessor():
    numeric_features = ['Mileage', 'Engine_Size']
    ordinal_feature = ['Options']
    binary_features = ['Gear_Type']
    low_card_features = ['Origin']
    high_card_features = ['Make', 'Type', 'Region']

    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler())
    ])

    year_transformer = Pipeline(steps=[
        ('scaler', MinMaxScaler())
    ])

    ordinal_transformer = Pipeline(steps=[
        ('ordinal', OrdinalEncoder(categories=[options]))
    ])

    binary_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(drop='first', sparse_output=False))
    ])

    low_card_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(sparse_output=False))
    ])

    high_card_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
    ])

    # Combine transformers
    preprocessor = ColumnTransformer(
        transformers=[
            ('year', year_transformer, ['Year']),
            ('num', numeric_transformer, numeric_features),
            ('ord', ordinal_transformer, ordinal_feature),
            ('binary', binary_transformer, binary_features),
            ('low_card', low_card_transformer, low_card_features),
            ('high_card', high_card_transformer, high_card_features)
        ],
        remainder='drop'
    )

    return preprocessor

In [None]:
aiplatform.init(project=PROJECT_ID, location=REGION)
endpoint = aiplatform.Endpoint(ENDPOINT_ID)

preprocessor = generate_preprocessor()

# Create pandas DataFrame with correct Saudi used cars data:
new_car_data = pd.DataFrame({
    'Type': ['Land Cruiser'],  # Example car type
    'Region': ['Riyadh'],      # One of the Saudi regions
    'Make': ['Toyota'],        # Car manufacturer
    'Gear_Type': ['Automatic'], # 'Automatic' or 'Manual'
    'Origin': ['Saudi'],       # Car origin
    'Options': ['Full'],       # 'Standard', 'Semi Full', or 'Full'
    'Year': [2020],           # Car year
    'Engine_Size': [4.0],      # Engine size in liters
    'Mileage': [50000],       # Mileage in kilometers
    'Negotiable': [False]      # Whether price is negotiable
})

X_processed = preprocessor.fit_transform(new_car_data)
print("X_processed => ", X_processed)

prediction = endpoint.predict(instances=X_processed.tolist())
print("PREDICTION: ", prediction)