# **Neutral Classes**

### ***Loading Libraries***

In [3]:
# Operating Systems
import os
import shutil

# Numerical Computing
import numpy as np

# Data Manipuation
import pandas as pd

# SciPy
import scipy
from scipy import stats

# Data Visualization
import itertools
import seaborn as sns
import matplotlib.pyplot as plt

# BigQuery
from google.cloud import bigquery
from google.colab import auth

# Scikit-Learn
from sklearn import linear_model
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MultiLabelBinarizer

# Extreme Gradient Boosting
import xgboost as xgb

# TensorFlow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow_hub import KerasLayer
from tensorflow import feature_column as fc
from tensorflow.keras.preprocessing import text
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras import callbacks, layers, models, utils
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Embedding, Input, Flatten, Conv2D, MaxPooling2D

In [4]:
# User Authentication
auth.authenticate_user()

# BigQuery Library
# !pip install --upgrade google-cloud-bigquery

In [5]:
project_id = 'core-catalyst-425922-v9'
os.environ['GOOGLE_CLOUD_PROJECT'] = project_id

# BigQuery Client Config
client = bigquery.Client(project=project_id)

### ***On Synthetic dataset***

In [6]:
def create_synthetic_dataset(N, shuffle):
    # Random Array
    prescription = np.full(N, fill_value='acetominophen', dtype='U20')
    prescription[:N//2] = 'ibuprofen'
    np.random.shuffle(prescription)

    # Neutral Class
    p_neutral = np.full(N, fill_value='Neutral', dtype='U20')

    # 10% is Patients with History of Liver Disease
    jaundice = np.zeros(N, dtype=bool)
    jaundice[0:N//10] = True
    prescription[0:N//10] = 'ibuprofen'
    p_neutral[0:N//10] = 'ibuprofen'

    # 10% is Patients with History of Stomach Problems
    ulcers = np.zeros(N, dtype=bool)
    ulcers[(9*N)//10:] = True
    prescription[(9*N)//10:] = 'acetominophen'
    p_neutral[(9*N)//10:] = 'acetominophen'

    df = pd.DataFrame.from_dict({
        'jaundice': jaundice,
        'ulcers': ulcers,
        'prescription': prescription,
        'prescription_with_neutral': p_neutral
    })

    if shuffle:
        return df.sample(frac=1).reset_index(drop=True)
    else:
        return df

create_synthetic_dataset(10, False)

Unnamed: 0,jaundice,ulcers,prescription,prescription_with_neutral
0,True,False,ibuprofen,ibuprofen
1,False,False,ibuprofen,Neutral
2,False,False,ibuprofen,Neutral
3,False,False,acetominophen,Neutral
4,False,False,acetominophen,Neutral
5,False,False,ibuprofen,Neutral
6,False,False,ibuprofen,Neutral
7,False,False,acetominophen,Neutral
8,False,False,acetominophen,Neutral
9,False,True,acetominophen,acetominophen


In [7]:
df = create_synthetic_dataset(1000, shuffle=True)

for label in ['prescription', 'prescription_with_neutral']:
    ntrain = 8*len(df)//10
    lm = linear_model.LogisticRegression()
    lm = lm.fit(df.loc[:ntrain-1, ['jaundice', 'ulcers']], df[label][:ntrain])
    acc = lm.score(df.loc[ntrain:, ['jaundice', 'ulcers']], df[label][ntrain:])
    print('label={} accuracy={}'.format(label, acc))

label=prescription accuracy=0.55
label=prescription_with_neutral accuracy=1.0


### On Natality Data

In [9]:
%%bigquery
CREATE OR REPLACE MODEL mlpatterns.neutral_2classes
OPTIONS(model_type='logistic_reg', input_label_cols=['health']) AS

SELECT
  IF(apgar_1min >= 9, 'Healthy', 'NeedsAttention') AS health,
  plurality,
  mother_age,
  gestation_weeks,
  ever_born
FROM `bigquery-public-data.samples.natality`
WHERE apgar_1min <= 10

In [10]:
%%bigquery
SELECT * FROM ML.EVALUATE(MODEL mlpatterns.neutral_2classes)

In [11]:
%%bigquery
CREATE OR REPLACE MODEL mlpatterns.neutral_3classes
OPTIONS(model_type='logistic_reg', input_label_cols=['health']) AS

SELECT
  IF(apgar_1min = 10, 'Healthy', IF(apgar_1min >= 8, 'Neutral', 'NeedsAttention')) AS health,
  plurality,
  mother_age,
  gestation_weeks,
  ever_born
FROM `bigquery-public-data.samples.natality`
WHERE apgar_1min <= 10

In [12]:
%%bigquery
SELECT * FROM ML.EVALUATE(MODEL mlpatterns.neutral_3classes)