# ML Models on Google Cloud Serverless Compute

In [1]:
import sklearn
import pickle
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('sentiment_analysis.csv')
df.head()

Unnamed: 0,text,label
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


In [3]:
df.shape

(7100, 2)

In [4]:
x = df['text']

y = df['label']


xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.2)

In [5]:
tfidf_vector = TfidfVectorizer(max_features=15)

In [6]:
logistic_classifier = LogisticRegression(solver='liblinear')

In [7]:
classification_pipeline = Pipeline(steps=[('tfidf_vect', tfidf_vector), 
                                          ('classifier', logistic_classifier)])

pipeline_model = classification_pipeline.fit(xtrain, ytrain)

ypred = pipeline_model.predict(xtest)

accuracy_score(ytest, ypred)

0.5852112676056338

In [8]:
pickle.dump(pipeline_model, open('models/logistic_classifier/logistic_model.pkl', 'wb'))

In [9]:
# decision tree model

decision_tree_classifier = DecisionTreeClassifier(max_depth=10)

In [10]:
classification_pipeline = Pipeline(steps=[('tfidf_vect', tfidf_vector), 
                                          ('classifier', decision_tree_classifier)])

pipeline_model = classification_pipeline.fit(xtrain, ytrain)

ypred = pipeline_model.predict(xtest)

accuracy_score(ytest, ypred)

0.5647887323943662

In [11]:
pickle.dump(pipeline_model, open('models/decision_classifier/decision_model.pkl', 'wb'))

In [12]:
# linear support vector classifier

linear_svc = LinearSVC(C=1.0, max_iter=100)

In [13]:
classification_pipeline = Pipeline(steps=[('tfidf_vect', tfidf_vector), 
                                          ('classifier', linear_svc)])

pipeline_model = classification_pipeline.fit(xtrain, ytrain)

ypred = pipeline_model.predict(xtest)

accuracy_score(ytest, ypred)

0.5859154929577465

In [14]:
pickle.dump(pipeline_model, open('models/support_classifier/support_model.pkl', 'wb'))

In [None]:
# GCP ml model uploads


- go to cloud storage and upload your models in the buckets

In [15]:
!python --version

Python 3.9.12


In [17]:
!scikit-learn --version

'scikit-learn' is not recognized as an internal or external command,
operable program or batch file.
