In [1]:
!pip install streamlit



In [2]:
!pip install pyngrok==4.1.1



In [3]:
!pip install scikit-tensor-py3



In [4]:
!ngrok

NAME:
   ngrok - tunnel local ports to public URLs and inspect traffic

DESCRIPTION:
    ngrok exposes local networked services behinds NATs and firewalls to the
    public internet over a secure tunnel. Share local websites, build/test
    webhook consumers and self-host personal services.
    Detailed help for each command is available with 'ngrok help <command>'.
    Open http://localhost:4040 for ngrok's web interface to inspect traffic.

EXAMPLES:
    ngrok http 80                    # secure public URL for port 80 web server
    ngrok http -subdomain=baz 8080   # port 8080 available at baz.ngrok.io
    ngrok http foo.dev:80            # tunnel to host:port instead of localhost
    ngrok http https://localhost     # expose a local https server
    ngrok tcp 22                     # tunnel arbitrary TCP traffic to port 22
    ngrok tls -hostname=foo.com 443  # TLS traffic for foo.com to port 443
    ngrok start foo bar baz          # start tunnels from the configuration file

VERSI

In [5]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [81]:
%%writefile utils.py
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
import torch.nn as nn
import torch
import joblib
import os
import random

#tf denpendencies
import tensorflow as tf
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Dense, LSTM,Dropout,Activation,Input,Bidirectional
from tensorflow.keras.optimizers import Adagrad, Adam, SGD, RMSprop
from tensorflow.keras.models import load_model,save_model
from tensorflow.keras.utils import plot_model,to_categorical
from sktensor import dtensor


def load_dataset(df_file_path,route_dir,user_dir):
  
  cols = ['id', 'userId', 'gender', 'sport',
          'calories', 'Route_id', 'distance_adjusted_sum','altitude_adjusted','distance_adjusted','speed_adjusted','heart_rate_adjusted','derived_speed','distance_adjusted_sum']
  df = pd.read_csv(df_file_path, usecols=cols, nrows=500)
  df.rename(columns={'speed_adjusted': 'speed','altitude_adjusted':'altitude','distance_adjusted':'derived_distance','heart_rate_adjusted':'heart_rate'},inplace=True)
  scaler_dic = dict()
  features = ['calories','altitude','derived_distance','speed','heart_rate','distance']

  dir = '/content/gdrive/My Drive/endomondoHR/Data/scaler_model'

  for feature in features:
      path = os.path.join(dir, 'scaler_'+feature+'_2.m')
      scaler_dic[feature] = joblib.load(path)
  # Load onehot encoder for categorical features

  dir = '/content/gdrive/My Drive/endomondoHR/Data/model/Final Model/Distance Prediction/OneHotEncoder.m'

  OneHot_enc = joblib.load(dir)

  # Load user embedding and route embedding

  # route_dir = '/content/gdrive/My Drive/endomondoHR/Data/embedding/routeEmbed_tensorD_13.m'
  # user_dir= '/content/gdrive/My Drive/endomondoHR/Data/embedding/userEmbed_tensorD_13.m'

  user_embed_df = joblib.load(user_dir)
  route_embed_df = joblib.load(route_dir)

  return df,user_embed_df,route_embed_df,OneHot_enc,scaler_dic


def data_process(gender, sport, calories, Route_id, distance_adjusted_sum, user_embed, route_embed, scaler_dic,OneHot_enc):
    # label encode gender

    choices = [0, 1, 2]
    conditions = [
        (gender == 'male'),
        (gender == 'female'),
        (gender == 'unknown')]

    genderId = np.select(conditions, choices, default=0)

    # label encode sport

    conditions = [
        (sport == 'run'),
        (sport == 'bike'),
        (sport == 'mountain bike')]

    sportId = np.select(conditions, choices, default=0)

    # scale calories
    calories_scaled = scaler_dic['calories'].transform(
        np.array(calories).reshape(-1, 1))[0]
    # scale distance
    total_distance_scaled = scaler_dic['distance'].transform(
        np.array(distance_adjusted_sum).reshape(-1, 1))[0]
    # one hot encode genderId and sportId
    gender_sport_onehot = OneHot_enc.transform(
        np.hstack((genderId, sportId)).reshape(1, -1)).toarray()[0]
    # concatenate input features into numpy array
    data_input = np.hstack((calories_scaled, total_distance_scaled,
                            user_embed, route_embed, gender_sport_onehot))

    return data_input
def predict_distance(model,data_input,scaler_dic,device):
  model.eval()
  with torch.no_grad():
      pred = model(torch.Tensor(data_input).to(device)).item()

  # convert predicted distance to km
  pred_km = scaler_dic['distance'].inverse_transform(
      np.array(pred).reshape(-1, 1))[0][0]
  return pred_km

def get_stop_point(pred_distance,derived_distance):
  # get where should the workout stop according to the calculated distance from model 1
  # if the predicted distance is larger than the whole workout distance, return -1
  if pred_distance > sum(derived_distance):
    return -1
  cur = 0
  for i in range(len(derived_distance)):
    cur += derived_distance[i]
    if cur >= pred_distance:
      return i

def generate_workout(df,pred_distance,workout_id,user_id,gender,calories,sport):
  # generate pd.DataFrame based on the information
  # get route
  temp = df[df['id'] == workout_id].copy()
  temp['userId'] = user_id
  temp['gender'] = gender
  temp['calories'] = calories
  temp['sport'] = sport
  temp.reset_index(drop=1,inplace=True)
  if len(temp) != 1:
    return None
  
  # Sport Gender Encoding
  choices = [0, 1, 2]
  conditions = [
      (temp['gender'] == 'male'),
      (temp['gender'] == 'female'),
      (temp['gender'] == 'unknown')]

  temp['genderId'] = np.select(conditions, choices, default=0)
  conditions = [
      (temp['sport'] == 'run'),
      (temp['sport'] == 'bike'),
      (temp['sport']  == 'mountain bike')]
  temp['sportId'] = np.select(conditions, choices, default=0)
  # handle contextual data
  context_info = ['id','userId','genderId','calories','sportId','Route_id']
  context_dict = {col:temp[col][0] for col in context_info}

  # handle sequence data
  seq = []
  speed = eval(temp['speed'][0])
  altitude = eval(temp['altitude'][0])
  distance = eval(temp['derived_distance'][0])
  hr = eval(temp['heart_rate'][0])

  complete_id = len(speed)
  if  complete_id <499:
    sub = 499-complete_id
    speed.extend([0]*sub)
    hr.extend([0]*sub)

  seq = [[altitude[i],distance[i],hr[i],speed[i]] for i in range(499)]
  sequence = np.array(seq)
  context = np.array([[context_dict['id'],context_dict['userId'],context_dict['genderId'],context_dict['sportId'],context_dict['Route_id'],context_dict['calories']]]*499)
  array = np.concatenate((context,
                sequence),axis=1)
  columns=['workoutId','userId','genderId','sportId','Route_id','calories','altitude','derived_distance','heart_rate','speed']
  df_sub = pd.DataFrame(array,columns=columns)
  # get stop point
  stop_point = get_stop_point(pred_distance,distance)
  return df_sub, stop_point

def convert_category(value,length):
  array_list = [0]*length
  array_list[int(value)] = 1
  return [array_list]*499

def generate_scaled(df_sub,user_embed,route_embed,scaler_dic):
  # generate scaled data based on workout dataframe, and return inputs and outputs for model2
  features = ['calories','altitude','derived_distance','speed','heart_rate']
  gender = convert_category(df_sub['genderId'][0],3)
  sport = convert_category(df_sub['sportId'][0],3)
  # route = [route_embed_dict[int(df_sub['Route_id'][0])]]*499
  # user = [user_embed_dict[int(df_sub['userId'][0])]]*499
  route = route_embed.reshape(1,-1).repeat(499,axis=0)
  user = user_embed.reshape(1,-1).repeat(499,axis=0)
  feature_array = np.concatenate((gender,sport,route,user),axis=1)
  for k in features:
    # speed input
    if k == 'speed':
      speed = scaler_dic['speed'].transform(df_sub[k].values.reshape(-1,1))
    # heart rate input
    elif k == 'heart_rate': 
      hr = scaler_dic['heart_rate'].transform(df_sub[k].values.reshape(-1,1))
    else:
      array = scaler_dic[k].transform(df_sub[k].values.reshape(-1,1))
      feature_array = np.concatenate((feature_array,array),axis=1)
  x1 = feature_array
  y1 = speed
  y2 = hr
  return x1.reshape(1,x1.shape[0],x1.shape[1]),y1.reshape(y1.shape[0],1),y2.reshape(y2.shape[0],1)

def apply_stop(stop_stamp):
  return np.append(np.ones(stop_stamp),np.zeros(499-stop_stamp))

def predict_sp_hr(df,user_embed_df,route_embed_df,scaler_dic,OneHot_enc,device,
                  distance_model,SpeedHr_model,
                    workout_id,user_id,gender,calories,sport):
  # combine model_1 and model_2

  # find Route_id based on workout id
  Route_id = df.loc[df.id == workout_id, 'Route_id'].to_numpy()[0]
  # find route total distance based on workout id
  distance_adjusted_sum = df.loc[df.id == workout_id, 'distance_adjusted_sum'].to_numpy()[0]
  # find userEmbedding based on userId
  user_embed = np.array(
      user_embed_df[user_embed_df.userId == user_id].userEmbed.values[0])
  # find routeEmbedding based on routeId
  route_embed = np.array(
      route_embed_df[route_embed_df.Route_id == Route_id].routeEmbed.values[0])
  # generate inputs for model_1
  input4distance = data_process(gender,sport,calories,Route_id,distance_adjusted_sum,user_embed,route_embed,scaler_dic,OneHot_enc)
  # predict distance
  pred_dis = predict_distance(distance_model,input4distance,scaler_dic,device)
  # generate workout according to the predicted distance
  input_df,stop_point =generate_workout(df,pred_distance=pred_dis,workout_id=workout_id,user_id=user_id,gender=gender,sport=sport,calories=calories)
  # return -1 if the distance is too short for the target input
  if stop_point == -1:
    return -1,-1,pred_dis,stop_point
  
  # Otherwise, generate input for model2
  input4sp, y_sp, y_hr = generate_scaled(input_df,user_embed,route_embed,scaler_dic)
  # predict speed and distance
  pred_sp,pred_hr = SpeedHr_model(input4sp)
  # inverse transform speed and distance & combine with the stop point
  pred_sp = scaler_dic['speed'].inverse_transform([np.array(pred_sp).flatten()]) * apply_stop(stop_point)
  pred_hr = scaler_dic['heart_rate'].inverse_transform([np.array(pred_hr).flatten()]) * apply_stop(stop_point)
  return pred_sp,pred_hr,pred_dis,stop_point



def load_model_torch(filename, model):

    DATA_PATH = "/content/gdrive/My Drive/endomondoHR/Data/model/Final Model/Distance Prediction/"+filename

    checkpoint = torch.load(DATA_PATH)
    model.load_state_dict(checkpoint['best_model_state_dict'])

    return model

def load_distance_model(LOAD_MODEL_NAME,device):
  class DisReg_MLP_2Layer(nn.Module):
    def __init__(self, input_dim, hidden_dim_1, p):
        super(DisReg_MLP_2Layer, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_1)
        self.fc2 = nn.Linear(hidden_dim_1, 1)

        self.act_1 = nn.ReLU()
        self.act_2 = nn.Sigmoid()

        self.drop_1 = nn.Dropout(p)

    def forward(self, data):

        output = self.fc1(data)
        output = self.act_1(output)
        output = self.drop_1(output)

        output = self.fc2(output)
        output = self.act_2(output)

        return output
  # LOAD_MODEL_NAME = 'DisReg_MLP_2'
  INPUT_DIM = 34
  HIDDEN_DIM = 16
  DROP_OUT = 0.2

  model = DisReg_MLP_2Layer(
    INPUT_DIM, HIDDEN_DIM, DROP_OUT).to(device)
  model = load_model_torch(LOAD_MODEL_NAME, model)
  return model

def load_sphr_model(filename):
  SpeedHr_model = load_model(filename)
  return SpeedHr_model


Overwriting utils.py


In [90]:
%%writefile app.py
import streamlit as st
#plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import joblib
# import torch.nn as nn
import torch
# from sktensor import dtensor
# import pandas as pd
# import os
from utils import predict_sp_hr,load_dataset,load_distance_model,load_sphr_model
# from tensorflow.keras.models import load_model
import numpy as np


if __name__ == '__main__':
  # load data and model 
  if torch.cuda.is_available():      
    device = torch.device('cuda')
  else:
      device = torch.device('cpu')
  # path
  file_path = '/content/gdrive/My Drive/endomondoHR/Data/Dataset/[1109]TestData_adjusted.csv'
  route_dir = '/content/gdrive/My Drive/endomondoHR/Data/embedding/routeEmbed_tensorD_13.m'
  user_dir= '/content/gdrive/My Drive/endomondoHR/Data/embedding/userEmbed_tensorD_13.m'
  LOAD_MODEL_NAME = 'DisReg_MLP_2'
  model2_name = '/content/gdrive/My Drive/endomondoHR/Data/model/Final Model/speed2hr.h5'
  df,user_embed_df,route_embed_df,OneHot_enc,scaler_dic = load_dataset(file_path,route_dir,user_dir)
  Distance_model = load_distance_model(LOAD_MODEL_NAME,device)
  SpeedHr_model = load_sphr_model(model2_name)
  
  ##simple demo
  df = df[:5]
  df['name'] = ['Bonan','Vincent','Han','Yu','Ma']
  
  user_option = st.sidebar.selectbox('Name:',list(df['name']))
  user_id = df[df['name']==user_option]['userId'].values[0]
  route_option = st.sidebar.selectbox('Route:',list(df['id']))
  gender = df[df['name']==user_option]['gender'].values[0]
  recommend_cal = df[df['id']==route_option]['calories'].values[0]
  total_distance = df[df['id']==route_option]['distance_adjusted_sum'].values[0]
  st.write('Workout Route Distance:',total_distance)
  sport = st.sidebar.selectbox('Route:',['run','bike','mountain bike'])
  calories = st.sidebar.slider('Target Calories:',min_value = int(0.5*recommend_cal),max_value = int(1.2*recommend_cal),value=int(recommend_cal),step=1)
  if st.sidebar.button('Run'):
    sp,hr,dis,stop_point = predict_sp_hr(df, user_embed_df, route_embed_df,scaler_dic,OneHot_enc,device,
                    Distance_model, SpeedHr_model, 
                      workout_id = route_option ,user_id=user_id,gender=gender,calories=calories,sport=sport)
    if stop_point == -1:
      st.write('Calories too high')
    else:
      st.write('Predicted distance:',dis)
      x_axis =np.arange(1,499)
      fig_sp = go.Figure()
      fig_sp.add_trace(go.Scatter(x=x_axis, y=sp.flatten(),
                    mode='lines'))
      fig_sp.update_layout(font_size=16,height=600,width=1400,title='Speed',
      xaxis = dict(title = 'TimeStamp'),
      yaxis = dict(title = 'Speed(km/h)'))
      st.plotly_chart(fig_sp)

      fig_hr = go.Figure()
      fig_hr.add_trace(go.Scatter(x=x_axis, y=hr.flatten(),
                    mode='lines'))
      fig_hr.update_layout(font_size=16,height=600,width=1400,title='HeartRate',
      xaxis = dict(title = 'TimeStamp'),
      yaxis = dict(title = 'HeartRate(bpm)'))
      st.plotly_chart(fig_hr)


Overwriting app.py


In [54]:
!streamlit run app.py --server.port 8502 &>/dev/null& 

In [55]:
!pgrep streamlit

1707


In [56]:
from pyngrok import ngrok
public_url = ngrok.connect(port='8502')

t=2020-11-11T13:16:29+0000 lvl=warn msg="failed to start tunnel" pg=/api/tunnels id=998e7fb1cf725cb7 err="Your account may not run more than 2 tunnels over a single ngrok client session.\nThe tunnels already running on this session are:\n[http://e10471b2773e.ngrok.io https://e10471b2773e.ngrok.io]\n\r\n\r\nERR_NGROK_324\r\n"



PyngrokNgrokHTTPError: ignored

In [11]:
public_urlb

'http://e10471b2773e.ngrok.io'

In [52]:
!kill 1663