In [2]:
# import packages
import numpy as np
import matplotlib.pyplot as plt
import scipy

%matplotlib inline

In [3]:
# activation function: ReLU
def ReLU(z):
    return np.maximum(0, z)

In [115]:
# activation function: sigmoid
def sigmoid(mat):
    return 1 / (1 + np.exp(-mat))

In [7]:
# softmax
def softmax(mat):
    mat_exp = np.exp(mat)
    return mat_exp / np.sum(mat_exp, axis=1, keepdims=True)

In [116]:
# initializing parameters: w, b
# z = w.T * x + b
def init_params(dim):
    w = np.zeros((dim, 1))
    b = 0
    return w, b

In [117]:
# calculate gradients and cost
def calc_grads_and_cost(w, b, X, Y):
    # number of examples
    m = X.shape[1]
    
    # z-value and y-hat vector
    z = np.dot(w.T, X) + b
    A = sigmoid(z)
    
    #A = softmax(ReLU(z))
    
    # cost
    cost = - 1 / m * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
    
    # gradients
    #result = [[1 if z[j, i] > 0 else 0 for i in range(z.shape[1])] for j in range(z.shape[0])]
    #result = [[1 for i in range(z.shape[1])] for j in range(z.shape[0])]
    
    #dz = np.asarray(result)
    dz = A - Y
    dw = 1 / m * np.dot(X, dz.T)
    db = 1 / m * np.sum(dz)
    cost = np.squeeze(cost)
    #print(dw, db)
    #print(A)
    #print(z, dw, db)
    
    # store gradients
    grads = {"dw": dw, "db": db}
    return grads, cost

In [118]:
# gradient descent
def grad_desc(w, b, X, Y, num_iter, learning_rate):
    costs = []
    
    for i in range(num_iter):
        grads, cost = calc_grads_and_cost(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]
        
        # update
        w = w - learning_rate * dw
        b = b - learning_rate * db
        
        #print(w, b)
        
        # record costs
        if i % 100 == 0:
            costs.append(cost)
            print("cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w, "b": b}
    grads = {"dw": dw, "db": db}
    return params, grads, costs

In [135]:
# classification
def predict(w, b, X):
    m = X.shape[1]
    Y_pred = np.zeros((1, m))
    #print(w.shape)
    w = w.reshape(X.shape[0], 1)
    
    A = sigmoid(np.dot(w.T, X) + b)
    #A = softmax(ReLU(np.dot(w.T, X) + b))
    
    for i in range(A.shape[1]):
        if A[0, i] <= 0.3333:
            Y_pred[0, i] = 0
        elif A[0, i] <= 0.6667:
            Y_pred[0, i] = 0.5
        else:
            Y_pred[0, i] = 1
    
    return Y_pred

In [150]:
# model
def model(X_train, Y_train, X_test, Y_test, num_iter, learning_rate):
    w, b = init_params(X_train.shape[0])
    params, grads, costs = grad_desc(w, b, X_train, Y_train, num_iter, learning_rate)
    w = params["w"]
    b = params["b"]
    Y_pred_test = predict(w, b, X_test)
    Y_pred_train = predict(w, b, X_train)
    # Print train/test Errors
    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_pred_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_pred_test - Y_test)) * 100))
    d = {"costs": costs,
         "Y_prediction_test": Y_pred_test, 
         "Y_prediction_train" : Y_pred_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iter}
    
    return d

In [151]:
xtr = [[1,2,3,4,5],
    [6,7,8,9,10]]
ytr = [0,1,0.5,1,0.5]
xte = [[21,22,23,24,25],
    [26,27,28,29,30]]
yte = [1,0.5,0,0.5,1]
xtr = np.asarray(xtr)
ytr = np.asarray(ytr)
xte = np.asarray(xte)
yte = np.asarray(yte)
model(xtr, ytr, xte, yte, 2000, 0.01)

cost after iteration 0: 0.693147
cost after iteration 100: 0.652004
cost after iteration 200: 0.645979
cost after iteration 300: 0.641644
cost after iteration 400: 0.638521
cost after iteration 500: 0.636265
cost after iteration 600: 0.634632
cost after iteration 700: 0.633447
cost after iteration 800: 0.632585
cost after iteration 900: 0.631958
cost after iteration 1000: 0.631499
cost after iteration 1100: 0.631164
cost after iteration 1200: 0.630918
cost after iteration 1300: 0.630738
cost after iteration 1400: 0.630606
cost after iteration 1500: 0.630509
cost after iteration 1600: 0.630437
cost after iteration 1700: 0.630385
cost after iteration 1800: 0.630346
cost after iteration 1900: 0.630317
train accuracy: 70.0 %
test accuracy: 60.0 %


{'costs': [0.6931471805599454,
  0.6520042926152847,
  0.6459787941788377,
  0.6416442205910403,
  0.6385206458377501,
  0.6362648331503258,
  0.6346318853506802,
  0.6334470512930337,
  0.6325854371654551,
  0.6319575686581291,
  0.6314991681905127,
  0.6311639257584329,
  0.6309183815909325,
  0.6307382952390465,
  0.6306060614609168,
  0.6305088648928132,
  0.6304373577389095,
  0.6303847090479975,
  0.6303459189743581,
  0.6303173227020841],
 'Y_prediction_test': array([[1., 1., 1., 1., 1.]]),
 'Y_prediction_train': array([[0.5, 0.5, 0.5, 1. , 1. ]]),
 'w': array([[ 0.55956054],
        [-0.13674106]]),
 'b': -0.1392603212506096,
 'learning_rate': 0.01,
 'num_iterations': 2000}

In [128]:
from itertools import cycle, islice
test_str = ["Watashi no namae ha neko fubuki desu.",
            "Kimi no koto ga daisuki da.",
            "Watashi to tsukiatte kudasai!",
            "Neko ha sonnani baka janai.",
            "Aishiteru yo, oniisan!",
            "Ore ga chuugokujin da kara, ramen ga suki da.",
            "Wo de mingzi shi Ran Yuxiao.",
            "Zuotian wo qule yitang bianlidian.",
            "Meiyou shenme nenggou zudang wo de jiaobu.",
            "Machunqing jiu shige da huaidan!",
            "Zheshi diwu ju zhongwen le.",
            "I don't like speaking English because of its difficulty",
            "Logistic regression with a Neural Network mindset",
            "There are only four English sentences in the training example.",
            "The last one. Here we go.",
           "Wo you kaishi shuo riyu le.",
           "Raner wo shuodeshi zhongwen, hahaha.",
           "Tianshang de xingxing bu shuohua, dishang de wawa jiao mama.",
           "Kapai dashi Cuisite shi wo zuixihuan de yingxiong.",
           "Kaado Masuta ha watashi no ichiban suki na chanpion desu.",
           "Mou sugoshi dake de ii, mou sugoshi dake te ii~",
           "Gakkou ni itte, jyoushi kousei wo mimashita.",
           "First come, first served.",
           "This sentence should not be terminated with a period",
           "League of Legends is one of the most popular MOBA games in the world.",
           "Star Guardian Ahri is one of my favorite skins in LoL!",
           "Thanks for your generous help, StackOverflow!"]
ascii_list_orig = []
for tr in test_str:
    char_list = [char for char in tr]
    ascii_list = list(map(ord, char_list))
    ascii_list = list(islice(cycle(ascii_list), 100))
    ascii_list = np.reshape(np.asarray(ascii_list), (1,-1))
    if len(ascii_list_orig) == 0:
        ascii_list_orig = ascii_list
    else:
        ascii_list_orig = np.vstack((ascii_list_orig, ascii_list))
ascii_list_orig.shape

(27, 100)

In [129]:
test_label = [[0,0,0,0,0,0,0.5,0.5,0.5,0.5,0.5,1,1,1,1,0.5,0.5,0.5,0.5,0,0,0,1,1,1,1,1]]
test_label = np.asarray(test_label)
test_label.shape

(1, 27)

In [152]:
#print(ascii_list_orig.T.shape, test_label.T.shape)
d = model(ascii_list_orig.T, test_label, ascii_list_orig.T, test_label, 1000, 0.00005)

cost after iteration 0: 0.693147
cost after iteration 100: 0.736976
cost after iteration 200: 0.622312
cost after iteration 300: 0.608742
cost after iteration 400: 0.606088
cost after iteration 500: 0.605370
cost after iteration 600: 0.605013
cost after iteration 700: 0.604687
cost after iteration 800: 0.604334
cost after iteration 900: 0.603959
train accuracy: 83.33333333333334 %
test accuracy: 83.33333333333334 %


In [169]:
my_test_items = ["Nani wo itteru no? Zenzen wakanai yo!", #0
                "Wo buzhidao ni zai shuo shenme dongxi!", #0.5
                "First come, first served.", #1
                "That's unbelievable!", #1
                "Kore ha nan desuka?", #0
                "Kotori ha umi chan no koto ga daisuke desu!", #0
                 "Wo shuode dou dui ba?", #0.5
                 "Oi, omae tachi, nani wo itteru no?", #0
                 "Kore kara mo yoroshiku ne~", #0
                 "Going off work!" #1
                ]
def run_test(my_test):
    my_char_list = [char for char in my_test]
    my_ascii_list = list(map(ord, my_char_list))
    my_ascii_list = list(islice(cycle(my_ascii_list), 100))
    my_ascii_list = np.reshape(np.asarray(my_ascii_list), (1,-1))
    my_pred = predict(d["w"], d["b"], my_ascii_list.T)
    print("y = " + str(np.squeeze(my_pred)))

In [170]:
my_pred = predict(d["w"], d["b"], my_ascii_list.T)
print("y = " + str(np.squeeze(my_pred)))

y = 1.0


In [171]:
for my_test in my_test_items:
    run_test(my_test)

y = 0.0
y = 1.0
y = 1.0
y = 0.5
y = 0.0
y = 0.5
y = 0.0
y = 0.0
y = 0.0
y = 0.5
