In [1]:
# Run locally & on the cloud
import pandas as pd
import numpy as np
import pickle
from xgboost import XGBRegressor
import os

In [2]:
new_test_data = pd.DataFrame({
    'hum': [0.78, 0.56],
    'weathersit': ['Mist', 'Clear'],
    'holiday': ['Yes', 'No'],
    'season': ['Spring', 'Winter'],
    'temp': ['0.66', '0.24'],
    'hr': ['10', '7'],
    'day': [4, 6],
    'quarter': [2, 1],
    'rush_hour': [0, 1]
})

new_test_data

Unnamed: 0,hum,weathersit,holiday,season,temp,hr,day,quarter,rush_hour
0,0.78,Mist,Yes,Spring,0.66,10,4,2,0
1,0.56,Clear,No,Winter,0.24,7,6,1,1



# **Variable Need for Google Cloud**

In [3]:
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform

In [4]:

## set up authentication using services account 
import os
# Authenticate using service account
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "sa-development.json"
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../vertex-ai-ml/dti-ds-31329ac0651d.json"

project_id = 'dti-ds'
dataset_id = 'ahjussi_dataset_011'
table_id = 'bike_test'
region = 'us-central1'
bucket_name = 'ahjussi_gcs_011'
blob_name = 'data/bike_test.csv'

model_name = 'xgb_model_final.pkl'

In [5]:
# Retrieve the model from Google Cloud Storage
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob(f'model/{model_name}')
    blob_model.download_to_filename('xgb_model_final.pkl')

    print ("Read model succeeded")
except:
    raise TypeError("An exception occurred")

Read model succeeded


In [6]:
#load data from BQ
from google.cloud import bigquery
## using bigquery client 
# client = bigquery.Client(credentials=credentials,project=project_id)
client = bigquery.Client(project=project_id)

# query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
df = query_job.result().to_dataframe()

# df = df.loc[:, new_test_data.columns]
# auto_cloud.columns = new_test_data.columns
#cleansing 
result = df.drop(['int64_field_0'], axis = 1)
result
#auto_cloud



Unnamed: 0,hum,weathersit,holiday,season,temp,hr,day,quarter,rush_hour
0,0.77,Clear,False,Fall,0.50,8,5,4,1
1,0.45,Clear,False,Fall,0.50,14,2,4,0
2,0.31,Clear,False,Fall,0.50,20,6,4,0
3,0.45,Clear,False,Fall,0.50,11,4,4,0
4,0.63,Clear,False,Fall,0.50,9,5,3,0
...,...,...,...,...,...,...,...,...,...
2358,0.40,Clear,False,Winter,0.56,18,2,1,1
2359,0.21,Clear,False,Winter,0.56,15,0,1,0
2360,0.64,Clear,False,Winter,0.56,19,6,1,0
2361,0.37,Mist,False,Winter,0.56,16,5,1,0


In [7]:
# Load the model
with open('xgb_model_final.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

y_pred_cloud_new_data = loaded_model.predict(new_test_data)
y_pred_cloud_new_data

array([194.23055 ,   8.019369], dtype=float32)

In [8]:
new_test_cloud = new_test_data.copy()
new_test_cloud['Demand Prediction'] = y_pred_cloud_new_data
new_test_cloud

Unnamed: 0,hum,weathersit,holiday,season,temp,hr,day,quarter,rush_hour,Demand Prediction
0,0.78,Mist,Yes,Spring,0.66,10,4,2,0,194.230545
1,0.56,Clear,No,Winter,0.24,7,6,1,1,8.019369


In [9]:
print(result.dtypes)


hum           float64
weathersit     object
holiday       boolean
season         object
temp          float64
hr              Int64
day             Int64
quarter         Int64
rush_hour       Int64
dtype: object


In [10]:
result['holiday'] = result['holiday'].astype(str).replace({'True':'Yes','False':'No'})

In [11]:
result

Unnamed: 0,hum,weathersit,holiday,season,temp,hr,day,quarter,rush_hour
0,0.77,Clear,No,Fall,0.50,8,5,4,1
1,0.45,Clear,No,Fall,0.50,14,2,4,0
2,0.31,Clear,No,Fall,0.50,20,6,4,0
3,0.45,Clear,No,Fall,0.50,11,4,4,0
4,0.63,Clear,No,Fall,0.50,9,5,3,0
...,...,...,...,...,...,...,...,...,...
2358,0.40,Clear,No,Winter,0.56,18,2,1,1
2359,0.21,Clear,No,Winter,0.56,15,0,1,0
2360,0.64,Clear,No,Winter,0.56,19,6,1,0
2361,0.37,Mist,No,Winter,0.56,16,5,1,0


In [12]:
result['quarter'].astype('category')
result['rush_hour'].astype('category')
result['holiday'].astype('category')
result['season'].astype('category')
result['weathersit'].astype('category')
result['day'].astype('category')

0       5
1       2
2       6
3       4
4       5
       ..
2358    2
2359    0
2360    6
2361    5
2362    2
Name: day, Length: 2363, dtype: category
Categories (7, Int64): [0, 1, 2, 3, 4, 5, 6]

In [13]:
# loaded_model.predict(X_test.iloc[3:13])
y_pred_file_cloud = loaded_model.predict(result)
y_pred_file_cloud[:13]

array([186.12625  , 219.20619  , 179.98456  , 185.88463  , 291.2779   ,
        14.774804 ,   3.3664975,  94.96565  ,   3.9352245, 229.72456  ,
       199.30913  , 355.08508  , 354.70297  ], dtype=float32)

In [14]:
result['Demand Prediction'] = y_pred_file_cloud
result

Unnamed: 0,hum,weathersit,holiday,season,temp,hr,day,quarter,rush_hour,Demand Prediction
0,0.77,Clear,No,Fall,0.50,8,5,4,1,186.126251
1,0.45,Clear,No,Fall,0.50,14,2,4,0,219.206192
2,0.31,Clear,No,Fall,0.50,20,6,4,0,179.984558
3,0.45,Clear,No,Fall,0.50,11,4,4,0,185.884628
4,0.63,Clear,No,Fall,0.50,9,5,3,0,291.277893
...,...,...,...,...,...,...,...,...,...,...
2358,0.40,Clear,No,Winter,0.56,18,2,1,1,485.738953
2359,0.21,Clear,No,Winter,0.56,15,0,1,0,160.346832
2360,0.64,Clear,No,Winter,0.56,19,6,1,0,277.710541
2361,0.37,Mist,No,Winter,0.56,16,5,1,0,365.304962


In [15]:
# Define your parameters
project_id = 'dti-ds'
bucket_name = 'ahjussi_gcs_011'
model_folder = 'data'
result_file_name = 'demand_bike_predictions.csv'
blob_name = f'{model_folder}/{result_file_name}'

# Assuming result is your DataFrame with the campaign_pred column
# Save the DataFrame as a CSV file
local_csv_path = result_file_name
result.to_csv(local_csv_path, index=False)

In [16]:
# Initialize the Google Cloud Storage client and upload the CSV file
try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)
    blob.upload_from_filename(local_csv_path)
    print("File uploaded to GCS successfully.")
except Exception as e:
    print(f"An error occurred while uploading to GCS: {e}")

File uploaded to GCS successfully.
