In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.model_selection import GridSearchCV
import functools
import json

In [2]:
# Read and prepare data
df_cancer = pd.read_csv("https://storage.googleapis.com/tutorial-datasets/data.csv")
df_cancer = df_cancer.drop(['id'], axis = 1)
df_cancer.drop("Unnamed: 32",axis=1,inplace=True)

# Define X and y
X = df_cancer.drop(['diagnosis'], axis = 1)
pd.set_option('display.max_columns', None)
df_cancer['diagnosis'] = df_cancer['diagnosis'].replace({'M': 0.0, 'B': 1.0})
y = df_cancer['diagnosis']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 20)

# Scale data
X_train_min = X_train.min()
X_train_max = X_train.max()
X_train_range = (X_train_max- X_train_min)
X_train_scaled = (X_train - X_train_min)/(X_train_range)
X_test_min = X_test.min()
X_test_range = (X_test - X_test_min).max()
X_test_scaled = (X_test - X_test_min)/X_test_range  

svc_model = SVC()
y_predict =  None

print("Dataset successfully updated")

Dataset successfully updated


In [3]:
def update_data():
  # Read and prepare data
  df_cancer = pd.read_csv("https://storage.googleapis.com/tutorial-datasets/data.csv")
  df_cancer = df_cancer.drop(['id'], axis = 1)
  df_cancer.drop("Unnamed: 32",axis=1,inplace=True)

  # Define X and y
  X = df_cancer.drop(['diagnosis'], axis = 1)
  pd.set_option('display.max_columns', None)
  df_cancer['diagnosis'] = df_cancer['diagnosis'].replace({'M': 0.0, 'B': 1.0})
  y = df_cancer['diagnosis']

  # Split dataset
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 20)

  # Scale data
  X_train_min = X_train.min()
  X_train_max = X_train.max()
  X_train_range = (X_train_max- X_train_min)
  X_train_scaled = (X_train - X_train_min)/(X_train_range)
  X_test_min = X_test.min()
  X_test_range = (X_test - X_test_min).max()
  X_test_scaled = (X_test - X_test_min)/X_test_range  

  return ("Dataset successfully updated")

In [4]:
def endpoint(fn):
  @functools.wraps(fn)
  def wrapper(*args, **kwds):
    req = args[0] if len(args) > 0 else '{}'
    request = json.loads(req)
    args = request.get('args', {})
    body = request.get('body', {})
    return fn(*(args, body), **kwds)
  return wrapper

In [5]:
@endpoint
def train_svm(args, body):
  type = args.get('param', args.get('basic', None))
    
  if type[0] == "basic":
    svc_model.fit(X_train, y_train)
    y_predict = svc_model.predict(X_test)
  
  if type[0] == "normalized":
    svc_model.fit(X_train_scaled, y_train)
    y_predict = svc_model.predict(X_test_scaled)
    
  if type[0] == "gridsearch":
    param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf']} 
    grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=4)
    grid.fit(X_train_scaled,y_train)
    y_predict = grid.predict(X_test_scaled)
    
  return y_predict, svc_model

In [6]:
@endpoint
def prediction(args, body):
  if (svc_model.predict(body.get('data', []))[0] == 1.0):
    return "Benign"
  elif (svc_model.predict(body.get('data', []))[0] == 0.0):
    return "Malign"

In [7]:
def get_recall():
  if y_predict is not None:
    precision,recall,fscore,support=score(y_test,y_predict,average='macro')
    return recall
  else:
    return "Model not trained yet, make a call to /train_svm to train it"

In [8]:
def show_performance():
  if y_predict is not None:
    return classification_report(y_test, y_predict)
  else:
    return "Model not trained yet, make a call to /train_svm to train it"

In [9]:
# Mock request object for local API testing
args = {
    'param': ['normalized']
}

body = {
"data": [[0.43379,0.576674,0.145236,0.079089,0.391132,0.378409,0.250831,0.149869,0.261526,0.389277,0.172851,0.429501,0.184958,0.058061,0.320735,0.429637,0.714606,0.629737,0.346465,0.291192,0.112803,0.462482,0.111893,0.056220,0.393047,0.253766,0.344997,0.296245,0.181283,0.243578]]
}

REQUEST = json.dumps({ 'args': args, 'body': body })

In [10]:
# GET /update_data
print(update_data())

Dataset successfully updated


In [11]:
# POST /train_svm
y_predict, svc_model=train_svm(REQUEST)
print("SVM trained!")

SVM trained!




In [12]:
# POST /predict
print(prediction(REQUEST))

Benign


In [13]:
# GET /show_performance
print(show_performance())

              precision    recall  f1-score   support

         0.0       1.00      0.98      0.99        48
         1.0       0.99      1.00      0.99        66

    accuracy                           0.99       114
   macro avg       0.99      0.99      0.99       114
weighted avg       0.99      0.99      0.99       114



In [14]:
# GET /get_recall
print(get_recall())

0.9895833333333333
