In [1]:
import databricks.koalas as ks

import ray
from ray.util.sgd.torch.torch_trainer import TorchTrainer

import raydp.spark.context as context
from raydp.spark.torch_sgd import TorchEstimator
from raydp.spark.utils import random_split

import torch
import torch.nn as nn
import torch.nn.functional as F

from typing import Dict

import os

In [2]:
# add spark home into the env
os.environ["SPARK_HOME"] = "/home/xianyang/sw/spark-3.0.0-preview2-bin-hadoop2.7"

GB = 1024 * 1024 * 1024

In [3]:
# connect to ray cluster
ray.init(address="sr233:6379", node_ip_address="sr233", redis_password="123")

# init spark context
context.init_spark(app_name="Burger King",
                   num_executors=2,
                   executor_cores=10,
                   executor_memory=int(40 * GB))



[2m[36m(pid=142408)[0m starting org.apache.spark.deploy.master.Master, logging to /home/xianyang/sw/spark-3.0.0-preview2-bin-hadoop2.7/logs/spark-xianyang-org.apache.spark.deploy.master.Master-1-sr233.out
[2m[36m(pid=144105)[0m starting org.apache.spark.deploy.worker.Worker, logging to /home/xianyang/sw/spark-3.0.0-preview2-bin-hadoop2.7/logs/spark-xianyang-org.apache.spark.deploy.worker.Worker-1-sr233.out
[2m[36m(pid=99181, ip=10.0.0.134)[0m starting org.apache.spark.deploy.worker.Worker, logging to /home/xianyang/sw/spark-3.0.0-preview2-bin-hadoop2.7/logs/spark-xianyang-org.apache.spark.deploy.worker.Worker-1-sr234.out


In [4]:
# data processing with koalas
data_path = "hdfs://sr233:9000/data_10000"

df: ks.DataFrame = ks.read_json(data_path)
train_df, test_df = random_split(df, [0.7, 0.3])

In [5]:
n_plus = 522
n_time = 167
n_bkids = 126
n_weather = 35
n_feels = 20

# Bidirectional recurrent neural network (many-to-one)
class BiRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, fcn_input_size, fcn_output_size):
        super(BiRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.embeds_pluids = nn.Embedding(n_plus, 50)
        self.embeds_bkidx = nn.Embedding(n_bkids, 100)
        self.embeds_timeidx = nn.Embedding(n_time, 100)
        self.embeds_feelsBucket = nn.Embedding(n_feels, 100)
        self.embeds_weather = nn.Embedding(n_weather, 100)
        
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        
        self.hidden1 = nn.Linear(100, 100)
        self.hidden2 = nn.Linear(100, 1)
        self.flatten = nn.Flatten()
        
        self.drop_layer = nn.Dropout(p=0.3)
        self.fc = nn.Linear(fcn_input_size, fcn_output_size)
        
    def forward(self, pluids, timeidx, bkidx, weatheridx, feelsBucket):
        plu_embed = self.embeds_pluids(pluids)
        bkidx_embed = self.embeds_bkidx(bkidx)
        time_embed = self.embeds_timeidx(timeidx)
        weather_embed = self.embeds_weather(weatheridx)
        feels_embed = self.embeds_feelsBucket(feelsBucket)

        x = plu_embed

        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size) # 2 for bidirection 
        # Forward propagate gru
        gru_out, _ = self.gru(x, h0)
        ut = torch.tanh(self.hidden1(gru_out))
        # et shape: [batch_size, seq_len, att_hops]
        et = self.hidden2(ut)

        # att shape: [batch_size,  att_hops, seq_len]
        att = F.softmax(torch.transpose(et, 2, 1))
        
        # output shape [batch_size, att_hops, embedding_width]
        output = torch.matmul(att, gru_out)
        
        # flatten the output
        attention_output = self.flatten(output)
        context_features = torch.mul(attention_output,(1 + bkidx_embed + time_embed + weather_embed + feels_embed))
        ac1 = F.relu(context_features)
        
        dropout = self.drop_layer(ac1)
        output = self.fc(dropout)
        return output

In [6]:
# train with SGD
model = BiRNN(50, 50, 5, 100, 2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss = nn.CrossEntropyLoss()

estimator = TorchEstimator(num_workers=2,
                           model=model,
                           optimizer=optimizer,
                           loss=loss,
                           feature_columns=["pluids", "timeidx", "bkidx", "weatheridx", "feelsBucket"],
                           feature_shapes=[5, 0, 0, 0, 0],
                           feature_types=[torch.long, torch.long, torch.long, torch.long, torch.long],
                           label_column="label",
                           label_type=torch.long,
                           batch_size=100,
                           num_epochs=10)

estimator.fit(train_df)



[2m[36m(pid=99179, ip=10.0.0.134)[0m   parser.add_argument(




Epoch-0: {'num_samples': 140068, 'epoch': 1, 'batch_count': 701, 'train_loss': 0.6943321002558821, 'last_train_loss': 0.6958527565002441}
Epoch-1: {'num_samples': 140068, 'epoch': 2, 'batch_count': 701, 'train_loss': 0.6943508767096453, 'last_train_loss': 0.6848205924034119}
Epoch-2: {'num_samples': 140068, 'epoch': 3, 'batch_count': 701, 'train_loss': 0.6931239345679874, 'last_train_loss': 0.6537535786628723}
Epoch-3: {'num_samples': 140068, 'epoch': 4, 'batch_count': 701, 'train_loss': 0.6907080163565417, 'last_train_loss': 0.6253771781921387}
Epoch-4: {'num_samples': 140068, 'epoch': 5, 'batch_count': 701, 'train_loss': 0.6880078527841406, 'last_train_loss': 0.5378180146217346}
Epoch-5: {'num_samples': 140068, 'epoch': 6, 'batch_count': 701, 'train_loss': 0.6852638408370186, 'last_train_loss': 0.6295316219329834}
Epoch-6: {'num_samples': 140068, 'epoch': 7, 'batch_count': 701, 'train_loss': 0.6817466152246421, 'last_train_loss': 0.50005704164505}
Epoch-7: {'num_samples': 140068, 'ep

In [None]:
estimator.evaluate(test_df)

In [None]:
print(estimator.get_model())

In [None]:
estimator.shutdown()
context.stop_spark()
ray.shutdown()