# Project 2: Serverless Machine Learning Model Deployment | Google Cloud
- Trained a machine learning model and pipelined all preprocessing steps
- Encapsulated the model into a binary file
- Deployed the model as an endpoint using GCP's cloud functions
- Tested the model submitting requests from different endpoints

In [17]:
#First of all, the dataset to train the model is loaded
import pandas as pd
import seaborn as sns
resulting_metrics = pd.DataFrame()
df = pd.read_csv('SpotifyMexScored.csv')
df.tail()

Unnamed: 0,cancion,artista,playlist,track_id,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,Lista Rep,liked
988,No Es Por Acá,Carin Leon,3IyNJEsknaSFoUIn8qf1Lr,3bvJftZKZe5QKz433NczyV,0.746,0.37,2,-6.309,0.0306,0.456,0.0,0.106,0.595,Banda 2022,0
989,Solo Un Dia (Ahora Te Amo),Adan Romero,3IyNJEsknaSFoUIn8qf1Lr,51RcAoNIqe0G3284yxT8G0,0.692,0.377,5,-7.128,0.0497,0.522,0.0,0.0965,0.939,Banda 2022,0
990,La Buena y la Mala,Banda Tierra Sagrada,3IyNJEsknaSFoUIn8qf1Lr,4y0n8xKuEKE0J2sThzswhg,0.805,0.716,0,-4.498,0.039,0.527,0.0,0.247,0.905,Banda 2022,0
991,Que Te Vaya Bien,Julión Álvarez y su Norteño Banda,3IyNJEsknaSFoUIn8qf1Lr,1kJXYVVUu7o3B9gaJpoxjm,0.665,0.469,4,-5.809,0.028,0.23,0.0,0.234,0.383,Banda 2022,0
992,Dueño de Ti,"Sergio Vega ""El Shaka""",3IyNJEsknaSFoUIn8qf1Lr,7j9iuJvGg8Ve2O20CO8Oho,0.734,0.676,10,-0.007,0.047,0.314,0.0,0.0646,0.774,Banda 2022,0


In [2]:
#Preprocessing sklearn methods are called
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer

#Feature and label columns are defined
X = df[['danceability', 'energy',
       'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence']]
y = df['liked']



In [3]:
#Columns are then defined in the column transformer object
numerical_columns = ['danceability', 'energy',
       'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence']
transformations = [('numerical', StandardScaler(), numerical_columns)]
column_transformer = ColumnTransformer(transformers=transformations,remainder='passthrough')

In [4]:
#A gradient boosted tree method with 100 estimators is trained
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier(n_estimators=100)

In [7]:
#The preprocessing and model stages are packed within a pipeline
from sklearn.pipeline import Pipeline

final_pipeline = Pipeline([
('preprocessing',column_transformer),
('model',model)
])

#THe model is fit on all data (There is another project where I go around and evaluate different model's performace. For this project's scope, we can saely go with GBT)
final_pipeline.fit(X,y)

In [34]:
#Some mock test data is defined
test = [[0.665,0.469,4,-5.809,0.0280,0.230,0.0,0.2340,0.383]]

#Which is then transformed into a dataframe (this si required due to the columns transformer way of doing things)
test_df =pd.DataFrame(test)
test_df.columns=['danceability', 'energy',
       'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
       'liveness', 'valence']
test_df

Unnamed: 0,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence
0,0.665,0.469,4,-5.809,0.028,0.23,0.0,0.234,0.383


In [35]:
#We get the prediction for that data
result = final_pipeline.predict(test_df)
result[0]

0

In [36]:
#As well as the assocaited probability
import numpy as np
final_pipeline.predict_proba(test_df)[0][np.argmax(final_pipeline.predict_proba(test_df)[0])]

0.9774234712705926

In [21]:
#Since we are happy with this, we dump the pipeline as a joblib object
from joblib import dump, load
dump(final_pipeline,'SpotifyModel.joblib')

['SpotifyModel.joblib']