Reading data from BigQuery

In [66]:
from google.cloud import bigquery

bqclient = bigquery.Client()

# Download a table.
table = bigquery.TableReference.from_string(
    "mlops2-350118.dia.dia-table"
)
rows = bqclient.list_rows(
    table,
    # selected_fields=[
    #     bigquery.SchemaField("Age", "INTEGER"),
    #     bigquery.SchemaField("Outcome", "INTEGER"),
    #],
)
dataframe = rows.to_dataframe(
    # Optionally, explicitly request to use the BigQuery Storage API. As of
    # google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage
    # API is used by default.
    create_bqstorage_client=True,
)
print(dataframe.head())


   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            0      125             96              0        0  22.5   
1            0      100             70             26       50  30.8   
2            0      102             75             23        0   0.0   
3            0      113             80             16        0  31.0   
4            0      177             60             29      478  34.6   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.262   21        0  
1                     0.597   21        0  
2                     0.572   21        0  
3                     0.874   21        0  
4                     1.072   21        1  


In [50]:
import pandas as pd
df = pd.read_csv("gs://mlops-bucket-may14/diabetes.csv",on_bad_lines='skip')
df_x=df.drop("Outcome", axis=1)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [46]:
from google.cloud import storage
import pickle
from io import BytesIO
import xgboost
# from sklearn.externals import joblib
from tempfile import TemporaryFile
storage_client = storage.Client()
bucket_name="mlops-bucket-may14"
model_bucket='dia_classifier.pkl'

bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob('dia_classifier.pkl')
pickle_in = blob.download_as_string()
my_dictionary = pickle.loads(pickle_in)

In [71]:
outcome_arr = my_dictionary.predict(dataframe.drop("Outcome", axis=1))
outcome_arr.shape

(768,)

In [72]:
dataframe['Outcome_Predicted'] = outcome_arr
dataframe

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Outcome_Predicted
0,0,125,96,0,0,22.5,0.262,21,0,0
1,0,100,70,26,50,30.8,0.597,21,0,0
2,0,102,75,23,0,0.0,0.572,21,0,0
3,0,113,80,16,0,31.0,0.874,21,0,0
4,0,177,60,29,478,34.6,1.072,21,1,1
...,...,...,...,...,...,...,...,...,...,...
763,13,145,82,19,110,22.2,0.245,57,0,0
764,14,175,62,30,0,33.6,0.212,38,1,1
765,14,100,78,25,184,36.6,0.412,46,1,1
766,15,136,70,32,110,37.1,0.153,43,1,1


In [73]:
dataframe.to_csv("predicted_outcome.csv")

In [41]:
from google.cloud import storage

sttorage_client = storage.Client()
blobs = sttorage_client.list_blobs('mlops-bucket-may14')

for blob in blobs:
    print(blob.path)

/b/mlops-bucket-may14/o/dia_classifier.pkl
/b/mlops-bucket-may14/o/diabetes.csv
/b/mlops-bucket-may14/o/shakespeare.csv
/b/mlops-bucket-may14/o/xgb_classify.pkl


In [69]:
from google.cloud import storage


def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"
    # The path to your file to upload
    # source_file_name = "local/path/to/file"
    # The ID of your GCS object
    # destination_blob_name = "storage-object-name"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print(
        "File {} uploaded to {}.".format(
            source_file_name, destination_blob_name
        )
    )

In [74]:
upload_blob("mlops-bucket-may14", "predicted_outcome.csv", "predicted_outcome.csv")

File predicted_outcome.csv uploaded to predicted_outcome.csv.
