## Notes : please use python version > 3.10 or 3.11 to run all the codes below 

# Installations

In [24]:
## import library 
import os
import pandas as pd
import numpy as np

#import google cloud library
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform
from support_functions import missing_value, fill_missing, list_dtypes

## sklearn module
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import pickle


In [25]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "/home/ariqulfikri/afif-036/sa-development.json"

In [26]:
project_id = 'dti-ds'
dataset_id = 'afif_dataset_036'
table_id = 'bank_marketing_campaign_forecast'
region = 'us-central1'
bucket_name = 'afif_gcs_036'
blob_name ='data/data_bank_marketing_campaign.csv'

In [27]:
import os

model_name = "best_model.pkl"
local_model_dir = "models"
local_model_path = os.path.join(local_model_dir, model_name)

# Ensure the local directory exists
os.makedirs(local_model_dir, exist_ok=True)

try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name)
    blob_model = bucket.blob(f"model/{model_name}")
    blob_model.download_to_filename(local_model_path)

    print("Retrieve model succeeded")
except Exception as e:
    raise TypeError(f"An error occurred: {e}")

Retrieve model succeeded


In [28]:
import pickle

model_path = 'models/best_model.pkl'

try:
    with open(model_path, 'rb') as file:
        model = pickle.load(file)
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")

Model loaded successfully!


# Model Prediction

In [33]:
import pickle
import pandas as pd

# Define the model name and path
model_name = "best_model.pkl"
model_path = f"models/{model_name}"

# Load the saved pipeline
with open(model_path, 'rb') as file:
    pipeline = pickle.load(file)

# Sample data for prediction
test_data = [
    [55, 'admin.', 1662, 'no', 'no', 'cellular', 'jun', 2, -1, 'unknown', '56+'],
    [39, 'self-employed', -3058, 'yes', 'yes', 'cellular', 'apr', 3, -1, 'unknown', '36-45']
]

# Column names based on your dataset
columns = ['age', 'job', 'balance', 'housing', 'loan', 'contact', 'month', 
           'campaign', 'pdays', 'poutcome', 'age_range']

# Create DataFrame
df = pd.DataFrame(test_data, columns=columns)

# Use the pipeline to preprocess and predict
predictions = pipeline.predict(df)

# Print predictions
print(predictions)


[1 0]


In [19]:
import os

model_path = f"models/{model_name}"
if os.path.exists(model_path):
    print(f"File exists at {model_path}")
else:
    print(f"File does not exist at {model_path}")


File exists at models/best_model.pkl


In [35]:
import db_dtypes
# load data from BQ
client = bigquery.Client(project=project_id)

# query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
test_df = query_job.result().to_dataframe()

test_df



Unnamed: 0,age,job,balance,housing,loan,contact,month,campaign,pdays,poutcome,deposit
0,18,student,108,False,False,cellular,aug,1,-1,unknown,True
1,18,student,108,False,False,cellular,feb,1,183,success,True
2,18,student,348,False,False,cellular,may,4,-1,unknown,True
3,18,student,108,False,False,cellular,sep,1,-1,unknown,True
4,19,student,779,False,False,cellular,apr,4,-1,unknown,True
...,...,...,...,...,...,...,...,...,...,...,...
3057,87,retired,2190,False,False,telephone,jan,2,-1,unknown,True
3058,88,retired,433,False,False,telephone,sep,1,274,failure,False
3059,90,retired,1,False,False,cellular,feb,3,-1,unknown,True
3060,90,retired,712,False,False,telephone,mar,1,-1,unknown,True


In [36]:
bulk_predict_df = test_df

In [37]:
y_pred = loaded_model.predict(bulk_predict_df)
y_pred_df = pd.DataFrame(y_pred, columns=['SubscriptionPrediction'])


result_df = pd.concat([bulk_predict_df.reset_index(drop=True), y_pred_df.reset_index(drop=True)], axis=1)
result_df

Unnamed: 0,age,job,balance,housing,loan,contact,month,campaign,pdays,poutcome,deposit,SubscriptionPrediction
0,18,student,108,False,False,cellular,aug,1,-1,unknown,True,1
1,18,student,108,False,False,cellular,feb,1,183,success,True,1
2,18,student,348,False,False,cellular,may,4,-1,unknown,True,1
3,18,student,108,False,False,cellular,sep,1,-1,unknown,True,1
4,19,student,779,False,False,cellular,apr,4,-1,unknown,True,1
...,...,...,...,...,...,...,...,...,...,...,...,...
3057,87,retired,2190,False,False,telephone,jan,2,-1,unknown,True,1
3058,88,retired,433,False,False,telephone,sep,1,274,failure,False,0
3059,90,retired,1,False,False,cellular,feb,3,-1,unknown,True,1
3060,90,retired,712,False,False,telephone,mar,1,-1,unknown,True,1
