In [1]:
# Import library 
import os
import pandas as pd
import numpy as np

# Import google cloud library
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform

## sklearn module
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import OneHotEncoder
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import TomekLinks
import category_encoders as ce
import joblib

In [2]:
# Set up authentication using services account 
# Authenticate using service account
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "/home/ayunouvalina14/yuna-019/sa-development.json" 

In [3]:
new_data = pd.DataFrame({
    'market_segment': ['Aviation', 'Direct'],
    'previous_cancellations': [13, 21],
    'booking_changes': [1, 14],
    'deposit_type': ['Non Refund', 'Non Refund'],
    'customer_type': ['Transient-Party', 'Contract'],
    'reserved_room_type': ['C', 'H'],
    'required_car_parking_spaces': [0, 2],
})

new_data

Unnamed: 0,market_segment,previous_cancellations,booking_changes,deposit_type,customer_type,reserved_room_type,required_car_parking_spaces
0,Aviation,13,1,Non Refund,Transient-Party,C,0
1,Direct,21,14,Non Refund,Contract,H,2


In [4]:
project_id = 'dti-ds'
dataset_id = 'yuna_dataset_019'
table_id = 'tespredict'
region = 'us-central1'
bucket_name = 'yuna_gcs_019'
blob_name = 'data/tespredict.csv'
model_name = 'logreg_model.pkl'

In [5]:
# Retrieve the model from Google Cloud Storage 
try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name)
    blob_model = bucket.blob(f'model/{model_name}')
    blob_model.download_to_filename('logreg_model.pkl')

    print("Read Model Succeeded")
except:
    raise TypeError("An exception Occured")

Read Model Succeeded


In [6]:
# Load Data From BigQuery

# Using BigQuery Client 
client = bigquery.Client(project=project_id)

# Query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
df = query_job.result().to_dataframe()
df



Unnamed: 0,market_segment,previous_cancellations,booking_changes,deposit_type,customer_type,reserved_room_type,required_car_parking_spaces
0,Direct,0,0,No Deposit,Group,C,0
1,Direct,0,0,No Deposit,Group,D,1
2,Direct,0,0,No Deposit,Group,F,0
3,Direct,0,4,No Deposit,Group,A,0
4,Groups,0,0,No Deposit,Group,A,0
...,...,...,...,...,...,...,...
195,Offline TA/TO,0,0,No Deposit,Transient-Party,E,0
196,Offline TA/TO,0,4,No Deposit,Transient-Party,D,0
197,Offline TA/TO,0,7,No Deposit,Transient-Party,A,0
198,Offline TA/TO,0,1,No Deposit,Transient-Party,F,0


In [7]:
# Load the Model From the File
loaded_model = joblib.load('logreg_model.pkl') 

# Now We Can Use Loaded Model to Make Predictions
predictions = loaded_model.predict(new_data)
predictions

array([1, 0])

In [8]:
new_test_unseen = new_data.copy()
new_test_unseen['Prediction'] = predictions
new_test_unseen

Unnamed: 0,market_segment,previous_cancellations,booking_changes,deposit_type,customer_type,reserved_room_type,required_car_parking_spaces,Prediction
0,Aviation,13,1,Non Refund,Transient-Party,C,0,1
1,Direct,21,14,Non Refund,Contract,H,2,0


In [9]:
# Bulk Prediction 

bulk_prediction = loaded_model.predict(df)
bulk_probabilities = loaded_model.predict_proba(df)[:, 1]  # For probability predictions of the positive class
df['Prediction'] = bulk_prediction
df['Probabilities'] = bulk_probabilities
df

Unnamed: 0,market_segment,previous_cancellations,booking_changes,deposit_type,customer_type,reserved_room_type,required_car_parking_spaces,Prediction,Probabilities
0,Direct,0,0,No Deposit,Group,C,0,0,0.421260
1,Direct,0,0,No Deposit,Group,D,1,0,0.070430
2,Direct,0,0,No Deposit,Group,F,0,0,0.483107
3,Direct,0,4,No Deposit,Group,A,0,0,0.215526
4,Groups,0,0,No Deposit,Group,A,0,1,0.573495
...,...,...,...,...,...,...,...,...,...
195,Offline TA/TO,0,0,No Deposit,Transient-Party,E,0,1,0.702997
196,Offline TA/TO,0,4,No Deposit,Transient-Party,D,0,1,0.511446
197,Offline TA/TO,0,7,No Deposit,Transient-Party,A,0,0,0.276836
198,Offline TA/TO,0,1,No Deposit,Transient-Party,F,0,1,0.705000


In [10]:
# Upload to BigQuery Dataset as a Table

table_id = 'test_predicted'
client = bigquery.Client()

# Define the Full Table ID
table_id_full = f"{client.project}.{dataset_id}.{table_id}"

# Define schema fields to add
schema = [
    bigquery.SchemaField("market_segment", "STRING"),
    bigquery.SchemaField("previous_cancellations", "INTEGER"),
    bigquery.SchemaField("booking_changes", "INTEGER"),
    bigquery.SchemaField("deposit_type", "STRING"),
    bigquery.SchemaField("customer_type", "STRING"),
    bigquery.SchemaField("reserved_room_type", "STRING"),
    bigquery.SchemaField("required_car_parking_spaces", "INTEGER"),
    bigquery.SchemaField("Prediction", "INTEGER"),
    bigquery.SchemaField("Probabilities", "FLOAT")
]

df.columns = ['_'.join(i.split(' ')) for i in df.columns]

In [11]:
# Load DataFrame into the BigQuery table
job = client.load_table_from_dataframe(df, table_id_full)

# Wait for the job to complete
job.result()
print(f"Loaded {job.output_rows} rows into {table_id_full}")

Loaded 200 rows into dti-ds.yuna_dataset_019.test_predicted


THANK YOU!