In [None]:
import pandas as pd
import numpy as np
import pickle
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.compose import TransformedTargetRegressor
from xgboost import XGBRegressor
from google.cloud import storage, bigquery

In [2]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'C:\Users\Asus\Desktop\DS Purwa\Module_3\Capstone Module 3\trial_bigq.json'

In [3]:
project_id = 'dtidsus'
dataset_id = 'capstone'
table_id = 'data_daegu_apartment'
region = 'us-central1'
bucket_name = 'modul4'
blob_name = 'adji/data_daegu_apartment.csv'
client = bigquery.Client(project='dtidsus')

In [4]:
try : 
    storage_client = storage.Client(project='dtidsus')
    bucket = storage_client.get_bucket(bucket_name)
    data_capstone = bucket.blob('adji/data_daegu_apartment.csv')
    data_capstone.upload_from_filename(r'C:\Users\Asus\Desktop\DS Purwa\Module_3\Capstone Module 3\Daegu_Cleaned.csv')

    print ("Uploading model succeeded")
except:
    raise TypeError("An exception occurred")

Uploading model succeeded


In [5]:
client = bigquery.Client('dtidsus')

In [6]:
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")

In [7]:
df = query_job.result().to_dataframe()



In [8]:
df

Unnamed: 0,HallwayType,TimeToSubway,SubwayStation,N_FacilitiesNearBy,N_FacilitiesNearBy_4,N_SchoolNearBy,N_Parkinglot,YearBuilt,N_FacilitiesInApt,Size,SalePrice
0,terraced,10min~15min,Kyungbuk_uni_hospital,1,5,1,0,1986,4,1796,267256
1,terraced,10min~15min,Kyungbuk_uni_hospital,1,5,1,0,1986,4,1796,221238
2,terraced,10min~15min,Kyungbuk_uni_hospital,1,5,1,0,1986,4,1796,409734
3,terraced,10min~15min,Kyungbuk_uni_hospital,1,5,1,0,1986,4,1796,300884
4,terraced,10min~15min,Kyungbuk_uni_hospital,1,5,1,0,1986,4,1796,371681
...,...,...,...,...,...,...,...,...,...,...,...
4118,corridor,5min~10min,Sin-nam,1,5,2,18,1986,3,508,105309
4119,corridor,5min~10min,Sin-nam,1,5,2,18,1986,3,508,64601
4120,corridor,5min~10min,Sin-nam,1,5,2,18,1986,3,508,53097
4121,corridor,5min~10min,Sin-nam,1,5,2,18,1986,3,508,63716


In [22]:
numerical_col_standard = ['N_FacilitiesNearBy', 'N_FacilitiesNearBy_4', 
                          'N_SchoolNearBy', 'YearBuilt', 'Size']
numerical_col_robust = ['N_Parkinglot']
categorical_col = ['HallwayType', 'TimeToSubway', 'SubwayStation']


In [23]:
# Feature vs Target
X = df.drop(columns='SalePrice', axis=1)
y = df['SalePrice']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
# Preprocessing
ct = ColumnTransformer(transformers=[('Standard', StandardScaler(), numerical_col_standard),
                                     ('Robuts', RobustScaler(), numerical_col_robust),
                                     ('OneHot', OneHotEncoder(handle_unknown='ignore'), categorical_col)
                                     ])

In [25]:
# Pipeline
model = TransformedTargetRegressor(regressor=XGBRegressor(
        colsample_bytree = 0.8,
        gamma = 0,
        learning_rate = 0.2,
        max_depth = 3,
        n_estimators = 100,
        subsample = 0.8,
        random_state = 42
    ), func = np.log1p, 
    inverse_func = np.expm1
    )

pipeline = Pipeline(steps=[('preprocessor', ct), ('model', model)])

In [27]:
# Fit pipeline

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [28]:
print('RMSE: ', rmse)
print('MAE: ', mae)
print('R2: ', r2)

RMSE:  42483.838010404885
MAE:  32882.53477746212
R2:  0.8375235795974731


In [29]:
# Save model
model_filename = 'model.pkl'
pickle.dump(model, open(model_filename, 'wb'))

In [30]:
# Upload to Google Cloud Platform

try:
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name)
    blob_model = bucket.blob('ilham/model/model.pkl')
    blob_model.upload_from_filename(model_filename)

    print("Uploading model succeeded")
except Exception as e:
    print("An exception occurred while uploading the model:", e)

Uploading model succeeded
