In [2]:
import pandas as pd
import numpy as np
import pickle
import os

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder
from sklearn.compose import TransformedTargetRegressor
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from google.cloud import storage, bigquery

In [3]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'C:\Users\diyah_l0lko0w\OneDrive\Documents\Capstone Project 3\trial_bigq.json'

In [4]:
project_id = 'dtidsus'
dataset_id = 'capstone'
table_id = 'data_california_house'
region = 'us-central1'
bucket_name = 'modul4'
blob_name = 'diah/data_california_house.csv'
client = bigquery.Client(project='dtidsus')

In [5]:
try : 
    storage_client = storage.Client(project='dtidsus')
    bucket = storage_client.get_bucket(bucket_name)
    data_capstone = bucket.blob('diah/data_california_house.csv')
    data_capstone.upload_from_filename(r'C:\Users\diyah_l0lko0w\OneDrive\Documents\Capstone Project 3\cleaned_data_california.csv')
  
    print ("Uploading model succeeded")
except:
    raise TypeError("An exception occurred")

Uploading model succeeded


In [6]:
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")

In [7]:
df = query_job.result().to_dataframe()



In [8]:
df

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity,median_house_value
0,-117.50,34.00,15.0,1929.0,317.0,1237.0,316.0,4.4063,INLAND,128500.0
1,-118.25,34.00,32.0,1218.0,342.0,1292.0,304.0,1.5781,<1H OCEAN,102900.0
2,-118.25,34.00,36.0,1176.0,309.0,1267.0,292.0,1.6382,<1H OCEAN,105000.0
3,-118.25,34.00,34.0,1905.0,552.0,2194.0,521.0,1.4792,<1H OCEAN,95800.0
4,-118.25,34.00,41.0,1768.0,475.0,1721.0,474.0,1.3030,<1H OCEAN,90400.0
...,...,...,...,...,...,...,...,...,...,...
14443,-122.52,37.92,47.0,793.0,163.0,334.0,151.0,5.8509,NEAR BAY,317800.0
14444,-120.84,37.92,27.0,471.0,84.0,195.0,72.0,3.3333,INLAND,208300.0
14445,-122.19,39.92,20.0,2563.0,658.0,1363.0,611.0,1.0230,INLAND,54200.0
14446,-120.09,39.92,19.0,2335.0,518.0,1028.0,383.0,1.7267,INLAND,60700.0


In [9]:
df.isna().sum()

longitude               0
latitude                0
housing_median_age      0
total_rooms             0
total_bedrooms        137
population              0
households              0
median_income           0
ocean_proximity         0
median_house_value      0
dtype: int64

In [10]:
df = df.dropna()

In [11]:
numerical_columns = ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income']
categorical_columns = ['ocean_proximity']

In [12]:
# Define features and target
X = df.drop(columns=['median_house_value'])
y = df['median_house_value']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
# One-hot encode categorical columns
encoder = OneHotEncoder(drop='first', sparse_output=False)
X_train_encoded = encoder.fit_transform(X_train[categorical_columns])
X_test_encoded = encoder.transform(X_test[categorical_columns])

In [14]:
# Scale numerical columns
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[numerical_columns])
X_test_scaled = scaler.transform(X_test[numerical_columns])

In [15]:
# Combine processed features back into a single dataset
X_train_processed = np.hstack([X_train_scaled, X_train_encoded])
X_test_processed = np.hstack([X_test_scaled, X_test_encoded])

In [16]:
# Pipeline
model_rf = TransformedTargetRegressor(
    regressor=RandomForestRegressor(
        n_estimators=100,            
        max_depth=3,                 
        random_state=42,              
    ),
    func=np.log1p,                  
    inverse_func=np.expm1         
)

In [17]:
# Train the model
model_rf.fit(X_train_processed, y_train)

In [18]:
# Make predictions
y_pred = model_rf.predict(X_test_processed)

In [19]:
# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [20]:
print('RMSE: ', rmse)
print('MAE: ', mae)
print('R2: ', r2)

RMSE:  73785.71595565505
MAE:  51776.952172141304
R2:  0.5757845655859628


In [21]:
# Save model
model_filename = 'model.pkl'
pickle.dump(model_rf, open(model_filename, 'wb'))

In [22]:
# Upload to Google Cloud Platform

try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name)
    blob_model = bucket.blob('ilham/model/model.pkl')
    blob_model.upload_from_filename(model_filename)

    print("Uploading model succeeded")
except Exception as e:
    print("An exception occurred while uploading the model:", e)

Uploading model succeeded


In [26]:
from google.cloud import aiplatform

In [29]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "dev_trial.json"

In [30]:
aiplatform.init(project='dtidsus', location='us-central1')

model = aiplatform.Model.upload(
    display_name='diah_model_000',
    artifact_uri="gs://modul4/ilham/model/",
    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-2:latest",
)

model.wait()

Creating Model
Create Model backing LRO: projects/41965541199/locations/us-central1/models/7103536708246831104/operations/4959258583318396928
Model created. Resource name: projects/41965541199/locations/us-central1/models/7103536708246831104@1
To use this Model in another session:
model = aiplatform.Model('projects/41965541199/locations/us-central1/models/7103536708246831104@1')


In [32]:
endpoint = aiplatform.Endpoint.create(
    display_name="diah-endpoint-000",
    project='dtidsus',
    location='us-central1',
)

Creating Endpoint
Create Endpoint backing LRO: projects/41965541199/locations/us-central1/endpoints/6990194651609169920/operations/5928658403109896192
Endpoint created. Resource name: projects/41965541199/locations/us-central1/endpoints/6990194651609169920
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/41965541199/locations/us-central1/endpoints/6990194651609169920')


In [33]:
min_replica_count: int = 1
max_replica_count: int = 1

In [34]:
endpoint.deploy( 
    model=model,
    deployed_model_display_name='diah_model_000',
    machine_type='e2-standard-2',
    min_replica_count=min_replica_count,
    max_replica_count=max_replica_count,
    sync=True, 
)

Deploying Model projects/41965541199/locations/us-central1/models/7103536708246831104 to Endpoint : projects/41965541199/locations/us-central1/endpoints/6990194651609169920
Deploy Endpoint model backing LRO: projects/41965541199/locations/us-central1/endpoints/6990194651609169920/operations/3246201875057344512


FailedPrecondition: 400 Model server exited unexpectedly. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=41965541199&resource=aiplatform.googleapis.com%2FEndpoint&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FEndpoint%22%0Aresource.labels.endpoint_id%3D%226990194651609169920%22%0Aresource.labels.location%3D%22us-central1%22. 9: Model server exited unexpectedly. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=41965541199&resource=aiplatform.googleapis.com%2FEndpoint&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FEndpoint%22%0Aresource.labels.endpoint_id%3D%226990194651609169920%22%0Aresource.labels.location%3D%22us-central1%22.