In [70]:
%matplotlib inline

import h2o
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.automl import H2OAutoML

import matplotlib.pyplot as plt
import numpy as np

In [71]:
# constant random seed 
seed = 3232

# Connect to a pre-existing cluster
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O cluster uptime:,1 day 1 hour 53 mins
H2O cluster timezone:,America/Denver
H2O data parsing timezone:,UTC
H2O cluster version:,3.20.0.8
H2O cluster version age:,24 days
H2O cluster name:,H2O_from_python_pivotal_g3qr87
H2O cluster total nodes:,1
H2O cluster free memory:,1.280 Gb
H2O cluster total cores:,8
H2O cluster allowed cores:,8


In [72]:
from h2o.utils.shared_utils import _locate # private function. used to find files within h2o git project directory.

#df = h2o.import_file(path=_locate("storymanagement.csv"))
df = h2o.import_file(path=_locate("infra_old.csv"))
# df = dfi.na_omit()


Parse progress: |█████████████████████████████████████████████████████████| 100%


In [73]:
df.describe()

Rows:13997
Cols:18




Unnamed: 0,Id,Title,Labels,Iteration,Iteration Start,Iteration End,Type,Estimate,Current State,Created at,Accepted at,Deadline,Requested By,Description,URL,Owned By,Owned By 2,Owned By 3
type,int,string,enum,int,enum,enum,enum,int,enum,enum,enum,string,enum,string,string,enum,enum,enum
mins,1.0,,,1.0,,,,0.0,,,,,,,,,,
mean,49031560.6875,,,349.669315801,,,,1.01371769384,,,,0.0,,,,,,
maxs,154943249.0,,,646.0,,,,8.0,,,,,,,,,,
sigma,50950125.2707,,,174.572012043,,,,0.97595403346,,,,-0.0,,,,,,
zeros,0,0,,0,,,,1563,,,,0,,0,0,,,
missing,156,106,6030,156,149,151,152,8967,155,588,154,13983,157,5533,156,1056,11230,13516
0,16.0,Get prioritization working again,,1.0,"Sep 26, 2005","Oct 2, 2005",feature,1.0,accepted,"Sep 12, 2006","Sep 26, 2005",,Nathan Wilmes,,https://www.pivotaltracker.com/story/show/16,,,
1,15.0,"Color-code iterations - grey for past, blue for current, green for future",,1.0,"Sep 26, 2005","Oct 2, 2005",feature,1.0,accepted,,"Sep 26, 2005",,Nathan Wilmes,,https://www.pivotaltracker.com/story/show/15,,,
2,4.0,"Save button on ""edit user"" is incorrectly labeled ""edit"".",,1.0,"Sep 26, 2005","Oct 2, 2005",bug,,accepted,,"Sep 27, 2005",,Rob Mee,,https://www.pivotaltracker.com/story/show/4,,,


In [74]:
# Does preprocessing on the raw data frame
def preprocess(df):
    df_edited = df
    
    for field in ["Id", "Current State", "Requested By", "URL", "Zendesk ID", "Integration", "Labels",
                  "Owned By", "Owned By 2", "Owned By 3", "Iteration Start", "Iteration End", "Created at", 
                  "Accepted at", "Deadline", "Title", "Description"
                 ]: 
        try:
            df_edited = df_edited.drop(field)
        except:
            pass

    # Convert fields to logical factors
    df_edited['Type'] = df_edited['Type'].asfactor() #drop non-features altogether?
    
    return df_edited


In [75]:
df_factors = preprocess(df)

In [76]:
# Set Stop Words
# The STOP WORDS we are importing are from the nltk package
import pandas as pd
import os

# Use local data file or download from GitHub
docker_data_path = "/home/h2o/data/nlp/stopwords.csv"
if os.path.isfile(docker_data_path):
  data_path = docker_data_path
else:
  data_path = "https://raw.githubusercontent.com/h2oai/h2o-tutorials/master/h2o-world-2017/nlp/stopwords.csv"

STOP_WORDS = pd.read_csv(data_path, header=0)
STOP_WORDS = list(STOP_WORDS['STOP_WORD'])

def tokenize(sentences, stop_word = STOP_WORDS):
    tokenized = sentences.tokenize("\\W+")
    tokenized_lower = tokenized.tolower()
    tokenized_filtered = tokenized_lower[(tokenized_lower.nchar() >= 2) | (tokenized_lower.isna()),:]
    tokenized_words = tokenized_filtered[tokenized_filtered.grep("[0-9]",invert=True,output_logical=True),:]
    tokenized_words = tokenized_words[(tokenized_words.isna()) | (~ tokenized_words.isin(STOP_WORDS)),:]
    return tokenized_words

# Break titles into sequence of words
descriptions = tokenize(df["Description"].ascharacter())
titles = tokenize(df["Title"].ascharacter())

x = df["Description"].ascharacter().cbind(df["Title"].ascharacter())
all_text = tokenize(x)
titles.head()
descriptions.head()
all_text.head()

C1
get
prioritization
working
color
code
iterations
grey
past
blue




In [77]:
# Train Word2Vec Model
from h2o.estimators.word2vec import H2OWord2vecEstimator

# This takes time to run - left commented out
description_w2v_model = H2OWord2vecEstimator(vec_size = 100, model_id = "w2v.hex")
description_w2v_model.train(training_frame=all_text)

# title_w2v_model = H2OWord2vecEstimator(vec_size = 100, model_id = "w2v.hex")
# title_w2v_model.train(training_frame=titles)

word2vec Model Build progress: |██████████████████████████████████████████| 100%


In [78]:
description_w2v_model.find_synonyms("command", count = 5)

OrderedDict([(u'commands', 0.74235999584198),
             (u'storycreate', 0.7106621861457825),
             (u'epicupdate', 0.7018342018127441),
             (u'addstory', 0.6913412809371948),
             (u'addcomment', 0.6774505376815796)])

In [79]:
# title_w2v_model.find_synonyms("count", count=5)

In [80]:
description_vecs = description_w2v_model.transform(descriptions, aggregate_method = "AVERAGE")
title_vecs = description_w2v_model.transform(titles, aggregate_method = "AVERAGE")
title_vecs.names = ["title_" + t for t in title_vecs.names]

title_vecs

title_C1,title_C2,title_C3,title_C4,title_C5,title_C6,title_C7,title_C8,title_C9,title_C10,title_C11,title_C12,title_C13,title_C14,title_C15,title_C16,title_C17,title_C18,title_C19,title_C20,title_C21,title_C22,title_C23,title_C24,title_C25,title_C26,title_C27,title_C28,title_C29,title_C30,title_C31,title_C32,title_C33,title_C34,title_C35,title_C36,title_C37,title_C38,title_C39,title_C40,title_C41,title_C42,title_C43,title_C44,title_C45,title_C46,title_C47,title_C48,title_C49,title_C50,title_C51,title_C52,title_C53,title_C54,title_C55,title_C56,title_C57,title_C58,title_C59,title_C60,title_C61,title_C62,title_C63,title_C64,title_C65,title_C66,title_C67,title_C68,title_C69,title_C70,title_C71,title_C72,title_C73,title_C74,title_C75,title_C76,title_C77,title_C78,title_C79,title_C80,title_C81,title_C82,title_C83,title_C84,title_C85,title_C86,title_C87,title_C88,title_C89,title_C90,title_C91,title_C92,title_C93,title_C94,title_C95,title_C96,title_C97,title_C98,title_C99,title_C100
0.188086,0.127916,-0.129372,0.152486,0.338477,0.00912391,0.157734,-0.356707,-0.0922398,0.170212,0.163349,0.317121,0.0202113,-0.183257,0.0926856,-0.318819,0.172105,-0.0626777,-0.114801,0.12463,0.0384651,-0.117826,0.176206,-0.0650803,0.161323,-0.117465,0.0539929,0.157668,0.0596385,0.0971185,-0.178937,-0.0970565,-0.145844,-0.0570755,0.187783,0.297012,-0.176955,0.229251,-0.331398,0.266274,-0.0340301,0.0715334,-0.0641393,-0.0894931,0.161287,-0.0973709,-0.0367113,-0.279735,-0.00192801,0.0970281,-0.133757,0.39448,0.0348548,0.0876515,0.109434,0.0979144,-0.0726092,0.0465762,-0.150089,0.0617235,0.0620913,-0.125935,-0.119216,-0.212368,-0.0206058,-0.170261,0.123682,-0.114226,-0.0510927,0.207406,-0.228304,-0.328696,0.0200945,0.0634632,0.22674,0.0200425,0.231817,0.106035,-0.0419326,-0.0778826,0.309077,-0.118036,0.0179647,-0.0429497,0.0551529,-0.157458,-0.0680207,0.0836619,-0.0746115,0.0375601,-0.392753,0.0670916,0.0850326,0.0646261,0.317202,0.0143353,-0.0623165,0.358006,0.208896,-0.00748622
0.367214,-0.17953,-0.092695,-0.127673,0.225561,-0.172358,0.0566831,-0.0613419,0.0378748,0.278177,-0.0864713,0.112894,0.354208,-0.136668,-0.105147,-0.0262015,0.116935,0.283287,0.0303628,0.241436,-0.153247,0.326696,-0.0251484,-0.0198338,0.0710071,0.0173082,0.0271255,-0.0859808,-0.366278,-0.00122737,-0.0480669,0.0980849,-0.275612,0.0929639,-0.0822857,-0.0534012,0.116795,0.0278006,-0.0719829,0.10535,0.0786901,0.0137653,-0.141171,-0.141631,-0.0217488,0.0628439,0.00287906,-0.0140959,-0.175107,0.0715026,-0.0727873,0.109603,0.147683,0.135902,0.141889,-0.0378708,-0.0584493,-0.0645161,0.0403031,0.341395,0.0665163,0.0790287,-0.0151153,-0.212877,0.118958,-0.00939733,-0.0423387,-0.0378805,0.0704566,-0.0186536,-0.172085,-0.225194,0.0416812,0.0481183,0.193663,0.089522,-0.239192,0.183827,0.117386,0.0198163,0.331121,-0.0289981,-0.262492,-0.215624,0.0900946,-0.0987128,-0.440045,0.0451287,-0.0302035,0.0927591,-0.152935,-0.102661,0.0226362,0.230832,0.303333,0.0329724,-0.0653111,-0.0271397,0.184695,-0.0659354
0.480815,0.130229,-0.104208,0.204063,0.0150451,0.0246889,-0.0150485,-0.209919,0.0954456,0.158683,0.192029,0.0423531,0.147843,-0.113443,0.0117924,0.016816,0.0718204,0.26121,0.155167,0.258771,0.129193,0.126819,0.197621,0.0441222,0.0723176,-0.112953,-0.208366,-0.0622957,0.0289479,0.0906553,-0.0912643,0.154813,-0.0667661,0.0040231,-0.211172,-0.0334883,-0.0436253,-0.0294151,0.24907,-0.0367554,-0.170907,0.0811243,-0.123655,0.0403053,0.099216,-0.131256,-0.181422,-0.11344,0.250007,0.0181646,0.0295118,-0.0114669,0.0305067,0.0286914,0.193522,0.220647,-0.112283,0.0853194,-0.0658557,0.0154037,-0.0610577,0.0676028,-0.257816,0.0595234,-0.121008,0.0184993,0.138114,-0.0179879,-0.0640675,0.247427,-0.14137,-0.167468,-0.262532,-0.0531595,0.120946,0.0805618,-0.283834,0.153377,0.213355,-0.023394,0.151939,-0.0546763,0.0715877,-0.267246,-0.100548,-0.179754,-0.0403066,0.0420316,0.166459,0.129643,0.0507231,0.0280778,-0.062254,-0.112193,0.478319,0.332348,-0.0428114,-0.100756,0.130444,-0.218748
0.468035,0.185052,0.0545805,0.127893,0.173926,-0.279434,-0.0668367,-0.0652828,0.245925,0.204639,-0.0922193,0.0779226,0.135966,-0.0982923,0.102098,-0.0664343,0.0812327,0.27624,0.271813,0.311494,-0.274495,0.167335,0.23851,-0.0773759,0.00660794,-0.0221537,-0.158168,-0.0597756,0.0608955,0.131573,-0.0680401,-0.0132229,-0.0850949,-0.00339016,0.0299912,0.0938509,0.103413,-0.0411474,0.129537,-0.022746,-0.0440876,0.0635332,-0.105531,0.0197526,0.280509,-0.165378,-0.0370136,-0.00960342,0.0522425,0.0832162,-0.0563692,0.0947914,0.119836,0.147857,0.0297221,0.0683818,-0.11222,0.0346143,-0.177989,0.144734,-0.125975,0.048247,0.0774518,0.053581,-0.181574,0.11395,0.00412421,-0.113851,-0.00734753,0.0463387,-0.188861,-0.146863,0.0307077,-0.0778117,0.195397,0.192009,-0.318489,0.161509,0.00894926,-0.057216,0.00130882,-0.191886,0.0184034,-0.201186,0.019412,-0.12969,0.0266292,0.0868081,0.378504,0.27591,0.0890128,0.117878,-0.0715017,0.100814,0.233745,0.160555,0.0670118,0.0164994,0.128597,-0.164615
0.383006,0.259186,-0.245439,0.248508,0.306512,-0.0207007,0.064156,-0.272913,0.0942179,0.0951756,0.0917322,0.283172,0.0130601,-0.00822826,-0.0182563,-0.0538714,0.0144765,0.208684,0.187999,0.150698,-0.0425998,0.117698,0.195443,-0.00712595,-0.112468,-0.373565,0.187763,-0.163033,0.0202026,0.142497,0.0466164,-0.012481,-0.0261205,0.0452454,-0.0777378,-0.033243,0.0707276,0.0182574,-0.0718917,0.0775229,0.00482712,-0.206615,-0.038901,-0.115946,0.223114,-0.00444625,-0.125233,-0.00183312,-0.445985,0.140751,-0.101516,-0.0656984,-0.0123016,0.158743,0.112207,0.075459,-0.306913,0.0198909,-0.0595531,0.134277,0.201024,-0.221863,-0.059144,-0.171056,-0.0124365,0.0979048,0.0570738,-0.0330961,-0.203538,0.0653255,-0.068854,0.052869,-0.0564745,0.203934,0.0285265,0.218269,-0.0418699,0.0857969,0.00623123,0.0890205,0.142549,0.16641,-0.0937462,-0.361322,0.0474857,0.0914403,-0.259198,0.0825028,-0.119018,0.0881021,0.0353018,-0.073049,0.193259,0.17778,0.0256961,0.190461,-0.200142,-0.0913125,-0.020908,-0.104506
0.421198,0.204887,-0.18059,0.262985,0.162702,-0.0758655,0.0251581,-0.0777206,0.0246385,0.178109,0.0310664,0.106648,0.16251,-0.0485186,0.161191,0.00610599,0.00198076,0.377629,-0.0148338,0.162544,0.106791,0.193809,0.175437,-0.119638,-0.0891612,0.0840606,-0.0437617,-0.0557694,0.0218122,0.0772465,0.0378834,0.0172715,-0.0982731,0.159089,0.0260749,0.054887,0.173757,-0.0193732,-0.0442236,0.0675518,-0.070614,0.028938,-0.0596291,-0.0553989,0.138084,-0.101603,0.063424,-0.0270035,-0.0779886,0.140297,0.00106515,0.171996,0.00633475,-0.0385332,0.0653583,0.144703,-0.0403903,-0.0466814,-0.0545053,0.191711,0.0309389,-0.0247801,-0.136157,-0.0491471,-0.0528606,0.0993216,0.0594317,-0.0444805,-0.0174233,0.234596,-0.125412,0.0434585,-0.0531911,0.0018301,-0.00859113,0.0807968,-0.296848,0.121097,-0.00103839,-0.0418994,0.332716,-0.0719882,0.0331564,-0.271683,0.121939,-0.0630637,-0.239036,0.149707,0.222547,0.0832915,0.132546,0.0630379,-0.0428265,0.0564498,0.139816,0.242903,0.0216053,0.15356,0.071958,-0.0769119
0.201147,0.137994,-0.12127,0.0474468,0.211772,-0.286759,-0.00333711,-0.206663,-0.00144101,0.151852,0.024861,-0.0142944,-0.00107396,0.118575,0.0303723,-0.0893127,0.0448517,0.149021,0.103709,0.120269,0.00749592,0.147688,0.295576,0.00374956,0.149102,-0.16572,0.134293,0.0232117,-0.183445,0.0635516,-0.0631104,0.0313631,0.0706572,-0.0872491,-0.129252,0.0212994,0.199218,0.00871235,0.131499,-0.0206955,-0.0853387,0.0354603,-0.0341343,0.0422979,0.150669,0.0203036,-0.0230078,-0.016823,-0.119096,0.20663,-0.0760541,0.164701,0.180465,0.0311633,0.112055,0.0426418,-0.0584196,0.252243,-0.142466,0.054865,0.240915,0.0722105,-0.0414682,-0.0485257,-0.185597,-0.189004,0.00986527,-0.141308,0.120489,0.27499,-0.107056,-0.0463852,-0.0361271,-0.0138374,0.259535,0.122511,-0.101453,0.260579,0.0351065,-0.0290578,0.284873,-0.0159079,-0.0421044,-0.209844,0.211849,0.0401594,-0.20875,0.0589215,-0.10727,0.118938,-0.0282616,-0.0734944,-0.0399338,0.0473782,0.247072,0.151298,-0.0446163,-0.000593965,0.0143507,-0.105611
0.412003,-0.0280199,-0.116937,0.0892335,0.164652,-0.102623,0.0748924,-0.109318,-0.0528999,0.151605,0.0294382,0.117442,0.252135,-0.127565,-0.206069,-0.105587,0.10092,0.20714,0.133325,0.141492,-0.199118,0.204931,0.159058,-0.0433719,0.145396,-0.200069,0.107206,-0.0656619,-0.144388,-0.0702058,0.0445306,0.0846873,-0.126555,-0.0165709,-0.22646,-0.242882,0.190112,-0.0477527,-0.0838633,0.0096753,0.116283,-0.0313192,-0.102194,-0.0430698,0.169274,0.0791015,0.0290944,-0.00102612,-0.0902044,0.078347,-0.181816,-0.094884,0.171355,0.178776,0.201178,0.0477955,-0.0530364,-0.141494,-0.0926044,0.222541,0.0638026,0.0753839,-0.027946,-0.245923,0.123085,0.10431,-0.0427279,-0.0835073,-0.0714602,0.0718197,-0.222512,-0.212751,0.0689939,-0.0357944,0.136036,0.0956853,-0.0828768,0.139352,0.130475,0.0315873,0.261581,0.119176,-0.153354,-0.228266,0.178498,0.0823835,-0.298815,0.0653857,0.0800785,0.0617099,-0.0990984,-0.0207555,0.0830962,0.143016,0.17825,0.150869,-0.17062,-0.0061021,-0.069018,-0.0708762
0.481766,0.150428,-0.336984,0.178436,0.298226,0.0575968,-0.223804,-0.213495,0.114379,0.23608,-0.0752967,0.244373,0.192953,-0.133811,-0.229992,0.175609,0.0717302,0.209209,0.173665,0.138121,-0.11805,0.122863,-0.0018456,0.138176,0.231964,-0.299233,-0.0380969,-0.0752969,-0.153229,0.0924772,0.0316309,-0.0391604,-0.104638,0.0906941,-0.115746,-0.0890325,-0.0429207,0.140229,-0.0231729,0.154372,-0.214589,-0.0886405,-0.0665157,0.0156858,0.140827,-0.0306526,-0.225275,-0.0667994,-0.455456,0.123731,-0.0903234,0.158013,-0.0283776,0.130914,0.00576895,0.0240123,-0.163377,-0.068569,-0.0378514,0.210281,-0.08946,-0.140251,0.0889843,-0.352678,0.122092,-0.0918625,0.102159,-0.040146,-0.152283,0.00326875,-0.0253768,-0.14288,-0.0921607,-0.0300462,0.185913,0.272042,-0.0995026,-0.0599888,-0.0943955,-0.191553,0.173668,0.189431,-0.143688,-0.276865,0.143837,0.243446,-0.236446,0.0460932,-0.0144051,0.146055,-0.0450765,-0.101685,0.122285,0.34268,0.0209926,0.17017,-0.207319,-0.0183814,0.0373778,0.0242986
0.228375,0.197827,-0.298811,0.122473,0.192512,-0.16336,0.128494,-0.204514,0.0246742,0.0997744,0.123696,0.129857,0.263519,-0.0458354,-0.0387162,0.00456123,0.171421,0.283294,-0.0157104,0.190059,0.032836,0.219973,0.327185,-0.0376912,-0.00633099,-0.0186611,0.0303158,-0.207714,-0.174308,-0.033515,0.0286136,0.0710745,-0.212915,0.0262176,-0.00471402,0.0344463,0.145778,-0.0177555,-0.0696007,0.03373,-0.106485,0.0182565,-0.10798,-0.0222204,0.124666,-0.0645313,0.0304721,-0.0559034,-0.195441,0.088606,-0.0519456,0.114447,0.0119239,0.0290968,0.130846,0.196181,-0.00385513,0.070618,0.00688532,0.206202,-0.0658922,0.022735,-0.060744,-0.0100964,0.00623938,0.00958995,-0.148126,-0.0659464,-0.00477598,0.157186,-0.0637982,0.0956761,0.00464897,0.0270905,0.0040641,0.100219,-0.241988,0.164373,0.0676812,0.0526013,0.301042,0.0721402,-0.0346548,-0.344571,0.178098,-0.00533112,-0.302028,0.0183109,0.132046,0.0824164,0.19958,0.0360034,-0.0411862,0.0781868,0.114925,0.259004,-0.0985917,0.069472,-0.0254431,-0.137991




In [81]:
df_edited = df_factors.cbind(description_vecs)
df_edited_title = df_factors.cbind(description_vecs).cbind(title_vecs)

df_edited.head()

Iteration,Type,Estimate,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26,C27,C28,C29,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39,C40,C41,C42,C43,C44,C45,C46,C47,C48,C49,C50,C51,C52,C53,C54,C55,C56,C57,C58,C59,C60,C61,C62,C63,C64,C65,C66,C67,C68,C69,C70,C71,C72,C73,C74,C75,C76,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86,C87,C88,C89,C90,C91,C92,C93,C94,C95,C96,C97,C98,C99,C100
1,feature,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,feature,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,bug,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,bug,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,feature,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,bug,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,bug,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,feature,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,feature,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,feature,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,




In [82]:

train,test,valid = df_edited.split_frame(ratios=[.85, .1], seed = seed)
train_title,test_title,valid_title = df_edited_title.split_frame(ratios=[.85, .1], seed = seed)

In [86]:
train_title.na_omit()
# See that the data is ready
train_title.describe()

Rows:11939
Cols:203




Unnamed: 0,Iteration,Type,Estimate,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26,C27,C28,C29,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39,C40,C41,C42,C43,C44,C45,C46,C47,C48,C49,C50,C51,C52,C53,C54,C55,C56,C57,C58,C59,C60,C61,C62,C63,C64,C65,C66,C67,C68,C69,C70,C71,C72,C73,C74,C75,C76,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86,C87,C88,C89,C90,C91,C92,C93,C94,C95,C96,C97,C98,C99,C100,title_C1,title_C2,title_C3,title_C4,title_C5,title_C6,title_C7,title_C8,title_C9,title_C10,title_C11,title_C12,title_C13,title_C14,title_C15,title_C16,title_C17,title_C18,title_C19,title_C20,title_C21,title_C22,title_C23,title_C24,title_C25,title_C26,title_C27,title_C28,title_C29,title_C30,title_C31,title_C32,title_C33,title_C34,title_C35,title_C36,title_C37,title_C38,title_C39,title_C40,title_C41,title_C42,title_C43,title_C44,title_C45,title_C46,title_C47,title_C48,title_C49,title_C50,title_C51,title_C52,title_C53,title_C54,title_C55,title_C56,title_C57,title_C58,title_C59,title_C60,title_C61,title_C62,title_C63,title_C64,title_C65,title_C66,title_C67,title_C68,title_C69,title_C70,title_C71,title_C72,title_C73,title_C74,title_C75,title_C76,title_C77,title_C78,title_C79,title_C80,title_C81,title_C82,title_C83,title_C84,title_C85,title_C86,title_C87,title_C88,title_C89,title_C90,title_C91,title_C92,title_C93,title_C94,title_C95,title_C96,title_C97,title_C98,title_C99,title_C100
type,int,enum,int,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real,real
mins,1.0,,0.0,-0.613289833069,-0.575609385967,-1.23421835899,-0.526966571808,-0.431601792574,-0.749376773834,-0.507206380367,-0.723794162273,-0.969160795212,-0.544210135937,-0.475648075342,-0.454058796167,-0.465893834829,-0.493187129498,-0.58183401823,-0.675256371498,-0.430630326271,-0.596178770065,-0.485198050737,-0.626087248325,-0.628068983555,-0.61051183939,-0.268756598234,-0.388613969088,-0.535161733627,-0.776922821999,-0.536987841129,-0.493956357241,-0.586057662964,-0.579938769341,-0.534640848637,-0.414664655924,-0.589319646358,-0.724550545216,-0.507375657558,-0.41931733489,-0.72639644146,-0.482137292624,-0.708372712135,-0.430437266827,-0.484024524689,-0.730877757072,-0.580257296562,-0.561119914055,-0.513206124306,-0.424999028444,-0.585996627808,-0.67748850584,-0.820198535919,-0.790945291519,-0.508906006813,-0.412578195333,-0.880422353745,-0.432851165533,-0.398434251547,-0.918032407761,-0.549733877182,-0.507644712925,-0.730715453625,-0.595769405365,-0.595451712608,-0.703657448292,-0.420623838902,-0.650616884232,-0.631075739861,-0.416171878576,-0.414008229971,-0.488304138184,-0.49297362566,-0.393995434046,-0.594390273094,-0.751760303974,-0.513906776905,-0.594174385071,-0.342025011778,-0.410349309444,-0.66924148798,-0.61498606205,-0.677455961704,-0.468721330166,-0.284346610308,-0.568247795105,-0.635349214077,-0.69828492403,-0.506187975407,-0.466769278049,-0.883827865124,-0.436517834663,-0.574395179749,-0.425618290901,-0.731976985931,-0.480838000774,-0.64013004303,-0.36668741703,-0.472652614117,-0.36119312048,-0.519134998322,-0.702114760876,-0.502573370934,-0.619824588299,-1.14683127403,-0.680188536644,-0.859753489494,-0.792958438396,-0.592392325401,-0.984288990498,-0.667868673801,-0.671155691147,-0.659297168255,-0.572814404964,-0.561081171036,-0.635900914669,-0.683375298977,-0.561897873878,-0.593623518944,-0.730300009251,-0.431662738323,-0.706795930862,-0.586316108704,-0.789358079433,-0.844152748585,-0.793018341064,-0.495203971863,-0.541367053986,-0.573286890984,-0.732297837734,-0.785753309727,-0.540769398212,-0.774441540241,-0.447408258915,-0.894012272358,-0.535094916821,-0.907183408737,-0.631721436977,-0.62001144886,-0.607661008835,-0.618080496788,-0.660893499851,-0.622833848,-0.500859797001,-0.842534184456,-0.768471956253,-0.623277068138,-0.811450719833,-0.670129954815,-0.560183286667,-0.597926318645,-0.739262759686,-0.887724339962,-0.620892822742,-0.476803690195,-0.566019713879,-0.7138017416,-0.506320953369,-0.687343776226,-0.781381607056,-0.867327213287,-0.455812871456,-0.840173661709,-0.58969193697,-0.500335991383,-0.736406624317,-0.848391771317,-0.635197222233,-0.672744631767,-0.523662209511,-0.490184396505,-0.544209957123,-0.587761640549,-0.511001586914,-0.709301054478,-0.770115315914,-0.449796825647,-0.799628436565,-0.486736118793,-0.527805745602,-0.637891829014,-0.454475045204,-0.729005515575,-0.509587287903,-0.376767575741,-0.599502623081,-0.537637591362,-0.864374756813,-0.621125519276,-0.59912955761,-0.91918027401,-0.701623260975,-0.855407059193,-0.490078836679,-0.869895517826,-0.592141151428,-0.64013004303,-0.462861090899,-0.727546989918,-0.55211597681,-0.626498222351,-0.446935236454,-0.365266919136,-0.739035844803
mean,350.29121112,,1.01156336725,0.173532317164,0.0819249671494,-0.157620582641,0.157079887281,0.0855575719176,-0.0250477300546,0.12346072803,-0.0972967705419,-0.00886456743425,0.0915515516994,0.0784899400996,0.100368274696,0.0786537753826,-0.0689306181292,-0.0136860749758,-0.110990416073,0.0507229447769,0.0744265920793,0.0826231861819,0.137419087159,-0.0860544402725,0.0369486883614,0.2093386097,0.0163351412482,0.0274663511158,-0.126261884713,0.0400947555191,0.0295866049001,-0.00983734481533,0.0785960740709,-0.0305939873342,0.0527136643865,-0.119150102637,-0.0215653412996,0.0145293529616,0.0130787680681,-0.0106443172156,-0.00315374554501,0.0104605967232,0.0325008200892,-0.0564687043273,0.00730047060019,-0.0966758413824,0.0606764431464,0.0964835814456,0.000493129661396,0.00669454290696,-0.0255493303705,-0.08874418653,0.0301463183215,-0.0369556452531,0.142182544664,0.0341500521252,0.096534399887,0.114915348798,0.0162916289569,-0.0830764558611,0.037828787664,-0.0783715811907,0.0687638585064,-0.0247698868212,-0.0923344110526,0.0225508662522,-0.0351670723898,-0.033833609124,0.0700803936607,0.108649029466,-0.0336795370506,0.0351289769509,0.191882882211,-0.138465305815,-0.165984601949,0.0291500960227,-0.0407078657256,0.0954861701012,0.112179917287,-0.0611646624058,0.123416138831,-0.0529925759765,0.0104229476056,0.193761185856,-0.00618207226613,-0.0337858322175,-0.199383561856,0.115005892921,-0.0233161143124,-0.120725925156,0.075708618134,0.0950024521922,0.0905445621686,0.00316540081307,0.0340300699093,-0.000508648527598,0.132762325176,0.14443997916,0.15609782698,0.000866934770352,0.0379528408195,0.0883262900454,-0.111774485251,0.220315408215,0.0787209778694,-0.1570372918,0.149039356796,0.108287166885,-0.032612116989,0.125258650739,-0.0975575781628,-0.0163801283937,0.0898592224449,0.0628122059516,0.105852136401,0.0807389885234,-0.0873504920391,0.0198007778979,-0.077541470278,0.0395766054055,0.116694250397,0.0840788946216,0.128234335267,-0.105207565908,0.0876706827441,0.191683392751,0.00858374384118,0.0184067351938,-0.125004606324,0.0229275083805,0.0285446169314,-0.0462870189484,0.0874575364004,-0.0503680930534,0.0309646738761,-0.0865823367108,0.00482682356605,-0.0222415640999,0.046710211778,0.0141231498312,-0.0374412872578,0.0316817330989,0.0549144618182,-0.0272424617947,0.00129175506692,-0.12392741073,0.044115045102,0.111519083745,-0.0261169460108,-0.0321548986418,-0.0291510523729,-0.108628208581,0.0203491153243,-0.0141412813562,0.132689309551,0.117648587758,0.0981082952904,0.103479883999,-0.00769525404705,-0.0948803514463,0.0574888986143,-0.0901031960659,0.0649490391698,-0.00377298862203,-0.0892725021778,0.0221656308594,-0.0343253747181,-0.0158283845951,0.0779868587561,0.094201110435,-0.0389716271006,0.0162999837006,0.163869974508,-0.133884721949,-0.157407372102,0.0307717005487,-0.0312314560961,0.121992828249,0.10242328206,-0.0790581615254,0.114526230092,-0.0425373865248,0.014034336148,0.195198006555,-0.0208193394291,-0.0422954083835,-0.207857687424,0.104170827778,-0.0549703664759,-0.142274081893,0.065976695675,0.0706405587128,0.0591245293349,0.00644716485421,0.0399292452178,-0.0147586423004,0.0991467344903,0.2017713196,0.158886330677,-0.00712454693984,0.0164569099287,0.0873867704573,-0.0700228936857
maxs,646.0,,8.0,0.900293946266,0.699758946896,0.438411414623,0.759470462799,0.791897594929,0.734706759453,0.782183229923,0.429335415363,0.643570065498,0.484470307827,0.677673339844,0.67483150959,0.73126244545,0.515747189522,0.966834068298,0.608020484447,0.891033113003,0.677953600883,0.636774361134,0.697039425373,0.32183483243,0.612055599689,0.900010347366,0.473545879126,0.572404682636,0.578393816948,0.485638052225,0.72515553236,0.65821903944,0.66552054882,0.717762112617,0.640419304371,0.310148835182,0.537465631962,0.583794772625,0.600347340107,0.620140135288,0.557578802109,0.579188466072,0.569473803043,0.697772443295,0.720835268497,0.408982306719,0.829753935337,0.616205632687,0.705097556114,0.542962431908,0.456192970276,0.511429429054,0.59760415554,0.540726304054,0.826151072979,0.697708904743,0.688951015472,0.681085169315,0.615364074707,0.50216782093,0.666195631027,0.607249677181,0.578204929829,0.382592201233,0.399918287992,0.471913874149,0.631305336952,0.445358633995,0.588347315788,0.738882780075,0.534432053566,0.511569023132,0.671588778496,0.609541416168,0.372943162918,0.492240786552,0.483769416809,0.848585903645,0.868007004261,0.568880558014,0.575047910213,0.486242860556,0.621272444725,0.683885097504,0.582779884338,0.481953650713,0.407612621784,0.754112541676,0.5916441679,0.30000680685,0.521249353886,0.656842529774,0.721830487251,0.586015820503,0.550296783447,0.569724440575,0.820994973183,0.818094849586,0.561354100704,0.59308719635,0.589929461479,0.700483620167,0.310746490955,1.36088478565,0.730355739594,0.562745809555,0.87689191103,0.824387133121,0.735573470592,1.0604673624,0.489234447479,0.633352220058,0.626809060574,0.73666703701,0.710343122482,0.659479022026,0.639350295067,0.617254376411,0.574141681194,0.787069916725,0.729984760284,0.954113006592,0.730303168297,0.460866034031,0.713595151901,0.954588234425,1.03749775887,0.649660944939,0.519276976585,0.619515419006,0.691489100456,0.737353026867,0.665837824345,0.68694370985,0.621948599815,0.595259785652,0.77789491415,0.58878993988,0.645826756954,0.609005331993,0.578197538853,0.602287232876,0.786089122295,0.613317012787,0.690800666809,0.534018337727,0.923779726028,0.902506768703,0.681130588055,0.582222700119,0.514623939991,0.574762105942,0.773537576199,0.57294857502,0.93098795414,0.820227384567,0.752903878689,0.957481086254,0.535880684853,0.547079622746,1.04549467564,0.635583043098,0.747534692287,0.907298386097,0.504001677036,0.527986764908,0.548001885414,0.623244762421,0.756757259369,0.86363786459,0.438598394394,0.681777536869,0.884093046188,0.501375615597,0.541514396667,0.797441244125,0.587009012699,0.839638471603,0.964495658875,0.478114396334,0.817789912224,0.54187810421,0.602932214737,1.20602869987,0.558392703533,0.510077297688,0.380150109529,0.556618511677,0.645308256149,0.409235984087,0.592852592468,0.747261285782,0.74621617794,0.892905235291,0.765299022198,0.824356198311,0.644262313843,0.750900030136,1.0180644989,0.812433600426,0.531377732754,0.730787575245,0.620513916016
sigma,174.29930567,,0.972371256124,0.186822047195,0.117912683275,0.123730057731,0.112342367905,0.115363994702,0.108712403116,0.153491445093,0.130588253441,0.137894104498,0.0818052666014,0.113226186803,0.101083083877,0.109155604883,0.112580360726,0.107121344618,0.112429779713,0.104496506283,0.126005590181,0.0888525359727,0.11093540924,0.103944218183,0.127559252899,0.109957708913,0.0967319355844,0.103240205126,0.118399404065,0.101223260872,0.111289646359,0.129187105108,0.100063593373,0.103323608803,0.1171298599,0.0880566572683,0.112905056608,0.111854577722,0.0958523561889,0.118016940579,0.109861387978,0.0930763154882,0.0945020511483,0.0977102760223,0.116762933543,0.108960655395,0.123175030601,0.122459245258,0.0995489252359,0.112775392859,0.106891663992,0.141379067611,0.100276905123,0.0988690526054,0.135997349087,0.187975227559,0.0878600290905,0.0959061820016,0.152108703626,0.0959580126473,0.100815198748,0.113346564698,0.10337224769,0.0920420614031,0.105248781256,0.0915401338434,0.101055596741,0.0983110554051,0.0994049726593,0.12794796775,0.0900372249609,0.0854413770743,0.093653901636,0.103848948553,0.114427766036,0.095140274634,0.103978231831,0.100448699539,0.0963397661403,0.111533717062,0.102847283275,0.110786891399,0.105040555163,0.0929740057661,0.100947469944,0.0913347176534,0.111822385401,0.118234723135,0.112429782583,0.099600902239,0.0916566318818,0.128649564035,0.115376483531,0.117237125032,0.0950616855301,0.0934251448868,0.111554233989,0.135447138888,0.0854760746162,0.11674385489,0.102007770355,0.0988221130795,0.108192168847,0.210143854565,0.143097584709,0.156485761654,0.148965158604,0.149409220353,0.150341776806,0.172906352723,0.146216482515,0.172730168003,0.108041637336,0.118928171306,0.122068883918,0.137540097992,0.146851446069,0.133687774841,0.135681549383,0.127560143525,0.156288321316,0.119460365253,0.151792201259,0.134432012755,0.122732142126,0.139865592577,0.132618061341,0.135490906528,0.133919003013,0.122326936678,0.136453053688,0.1688533963,0.139073361951,0.118966827968,0.114672248746,0.110172737114,0.119727205798,0.131506922398,0.119214633208,0.13503911827,0.122367832055,0.113781137915,0.112079899638,0.118734321445,0.153216154101,0.129290434609,0.133282183989,0.130868363776,0.119052657678,0.12170580786,0.118789827911,0.164001709251,0.129270338725,0.104608288021,0.149802621382,0.154165875362,0.109059798348,0.125999462444,0.177944082873,0.129764486098,0.131567445269,0.125399741096,0.124056873551,0.107534790322,0.126844983005,0.117353818661,0.126855146478,0.135718944374,0.123213813433,0.159489661104,0.108286741676,0.110918926372,0.114740631868,0.13145199484,0.11585837159,0.112395233038,0.131329615421,0.115867890611,0.123103636833,0.147680085032,0.129442210559,0.144043934336,0.136922254841,0.121728632692,0.130469261024,0.117112042278,0.124121903293,0.13557018934,0.134318583392,0.129337038928,0.125754698544,0.143324161682,0.142314995477,0.147616689929,0.114511519953,0.115307290875,0.120362400883,0.123360744601,0.121134552977,0.146146392658,0.116760415529,0.119771244287,0.1126101177
zeros,0,,1350,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
missing,140,137,7615,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,4743,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109
0,1.0,feature,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.367214113474,-0.179529592395,-0.0926950201392,-0.127672612667,0.225561141968,-0.172358110547,0.0566831044853,-0.0613418892026,0.0378748103976,0.278176903725,-0.0864713340998,0.112893752754,0.354207634926,-0.136667624116,-0.105146981776,-0.0262015461922,0.116935491562,0.283287107944,0.0303627923131,0.241436034441,-0.153246834874,0.326696038246,-0.0251483991742,-0.0198337994516,0.0710070803761,0.0173082277179,0.0271255392581,-0.0859808027744,-0.366278111935,-0.00122737383936,-0.0480668693781,0.0980848520994,-0.275611907244,0.0929639190435,-0.0822856873274,-0.0534012056887,0.116795450449,0.0278006065637,-0.0719829499722,0.10534966737,0.0786901116371,0.013765335083,-0.141171380877,-0.141630768776,-0.0217487737536,0.0628439262509,0.00287906336598,-0.0140959359705,-0.175107210875,0.071502648294,-0.0727873146534,0.109603181481,0.147682741284,0.135902032256,0.141889482737,-0.0378707610071,-0.0584492757916,-0.0645160675049,0.0403030775487,0.341395288706,0.0665162727237,0.0790286511183,-0.0151152797043,-0.212877213955,0.118957608938,-0.00939732789993,-0.0423387177289,-0.0378805212677,0.0704566091299,-0.0186535790563,-0.172085091472,-0.225194111466,0.0416811853647,0.0481183156371,0.193662703037,0.089521959424,-0.239191845059,0.183827474713,0.117385946214,0.0198162999004,0.331121474504,-0.0289981272072,-0.262491822243,-0.215623706579,0.0900945737958,-0.0987128019333,-0.440044939518,0.0451286882162,-0.0302035175264,0.0927590876818,-0.152935147285,-0.102661348879,0.0226361788809,0.230832234025,0.303333461285,0.0329723954201,-0.0653111040592,-0.0271397363394,0.184694737196,-0.0659353658557
1,1.0,bug,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.480814993382,0.130228951573,-0.104207850993,0.204063177109,0.0150450812653,0.0246889255941,-0.0150485252962,-0.20991897583,0.095445625484,0.15868319571,0.192028969526,0.0423531122506,0.147842630744,-0.11344293505,0.0117924297228,0.0168159902096,0.0718203708529,0.26120993495,0.155166938901,0.258770614862,0.12919293344,0.126818671823,0.197620764375,0.0441221855581,0.0723175927997,-0.112953282893,-0.208366051316,-0.0622957088053,0.0289479028434,0.0906552672386,-0.0912642553449,0.154813081026,-0.0667660832405,0.00402310211211,-0.211171999574,-0.0334882959723,-0.0436253026128,-0.0294151362032,0.249069526792,-0.0367554239929,-0.170906677842,0.081124342978,-0.12365540117,0.0403053350747,0.099216029048,-0.131256073713,-0.18142221868,-0.113439679146,0.250006973743,0.0181645546108,0.0295117814094,-0.0114668821916,0.0305066611618,0.0286914110184,0.193522199988,0.220646589994,-0.11228274554,0.0853193923831,-0.065855704248,0.0154037494212,-0.0610577203333,0.0676027983427,-0.257816255093,0.0595234446228,-0.12100815773,0.0184992682189,0.138113692403,-0.0179878678173,-0.0640674680471,0.247427150607,-0.141370311379,-0.167467519641,-0.262531548738,-0.0531595163047,0.120946206152,0.0805617719889,-0.283833891153,0.153377458453,0.213354721665,-0.0233940090984,0.151939049363,-0.0546762868762,0.0715876966715,-0.267245560884,-0.100548349321,-0.179754450917,-0.040306635201,0.0420316196978,0.166459172964,0.129643470049,0.0507230833173,0.0280778110027,-0.0622540228069,-0.112192884088,0.478319436312,0.332348495722,-0.0428113751113,-0.10075597465,0.130444183946,-0.21874794364
2,1.0,bug,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.468035191298,0.185051724315,0.0545805208385,0.127893015742,0.1739256531,-0.279433965683,-0.0668366700411,-0.0652828142047,0.245924890041,0.204639494419,-0.0922192558646,0.0779226496816,0.13596612215,-0.0982923433185,0.102098166943,-0.0664343014359,0.0812326595187,0.276239603758,0.271812796593,0.311493575573,-0.27449503541,0.167334720492,0.238510355353,-0.0773759484291,0.00660794461146,-0.022153660655,-0.158167853951,-0.0597756393254,0.0608955286443,0.131572619081,-0.0680400952697,-0.013222890906,-0.085094936192,-0.00339016318321,0.029991209507,0.093850903213,0.103412955999,-0.0411473736167,0.129536569118,-0.022746026516,-0.0440876074135,0.0635332390666,-0.105531431735,0.0197526272386,0.28050878644,-0.16537822783,-0.03701357916,-0.00960342492908,0.0522425323725,0.0832161977887,-0.0563692487776,0.0947913601995,0.119835525751,0.147856846452,0.0297220721841,0.0683818086982,-0.112220264971,0.0346142612398,-0.177988901734,0.144734218717,-0.12597464025,0.0482470095158,0.0774517655373,0.0535809695721,-0.181574210525,0.113949678838,0.00412421161309,-0.113851256669,-0.00734753208235,0.0463386587799,-0.188860759139,-0.146862611175,0.0307076573372,-0.0778116732836,0.195396527648,0.192009225488,-0.318488836288,0.161508992314,0.00894926209003,-0.0572159998119,0.00130882114172,-0.191885948181,0.0184033606201,-0.201186224818,0.0194119941443,-0.129690483212,0.0266291890293,0.0868080630898,0.378503888845,0.275909513235,0.0890128016472,0.117877833545,-0.071501724422,0.100814193487,0.233744561672,0.16055546701,0.0670118033886,0.0164994038641,0.12859736383,-0.164615243673


In [87]:
predictors = ["Iteration", "Type"] + description_vecs.names
predictors_title = ["Iteration", "Type"] + title_vecs.names

response = "Estimate"

# gbm_embeddings = H2OGradientBoostingEstimator(stopping_metric = "AUC", stopping_tolerance = 0.001,
#                                               stopping_rounds = 5, score_tree_interval = 10,
#                                               model_id = "gbm_embeddings.hex"
#                                              )

In [88]:
# Run GBM
gbm_model = H2OGradientBoostingEstimator(distribution = "gaussian", ntrees=100, learn_rate=.01)
gbm_model.train(x=predictors, y="Estimate", training_frame=train, validation_frame=valid)

# Run GBM for title
gbm_model_title = H2OGradientBoostingEstimator(distribution = "gaussian", ntrees=100, learn_rate=.01)
gbm_model_title.train(x=predictors_title, y="Estimate", training_frame=train_title, validation_frame=valid_title)


gbm Model Build progress: |███████████████████████████████████████████████| 100%
gbm Model Build progress: |███████████████████████████████████████████████| 100%


In [89]:
# print("With Embeddings AUC: " + str(round(gbm_model.auc(valid = True), 3)))
gbm_metrics = gbm_model.model_performance(test)
gbm_metrics


ModelMetricsRegression: gbm
** Reported on test data. **

MSE: 1.03016507946
RMSE: 1.01497048207
MAE: 0.672908011954
RMSLE: 0.458723991326
Mean Residual Deviance: 1.03016507946




In [90]:
gbm_metrics_title = gbm_model_title.model_performance(test_title)
gbm_metrics_title


ModelMetricsRegression: gbm
** Reported on test data. **

MSE: 0.991456531289
RMSE: 0.995719102603
MAE: 0.664238453888
RMSLE: 0.448884376383
Mean Residual Deviance: 0.991456531289




In [91]:
# Run AutoML
x = train.columns
y = "Estimate"
x.remove(y)
aml = H2OAutoML(max_runtime_secs = 30, seed = seed)
aml.train(x = x, y = y,
          training_frame = train,
          validation_frame = valid,
          leaderboard_frame = test)

AutoML progress: |████████████████████████████████████████████████████████| 100%


In [92]:
# Run AutoML
x = train_title.columns
y = "Estimate"
x.remove(y)
aml_title = H2OAutoML(max_runtime_secs = 30, seed = seed)
aml_title.train(x = x, y = y,
          training_frame = train_title,
          validation_frame = valid_title,
          leaderboard_frame = test_title)

AutoML progress: |████████████████████████████████████████████████████████| 100%


In [93]:

lb = aml.leaderboard
aml_leader_metrics = aml.leader.model_performance(test)

aml_leader_metrics


ModelMetricsRegression: gbm
** Reported on test data. **

MSE: 1.01825733804
RMSE: 1.0090873788
MAE: 0.686620541462
RMSLE: 0.450027100646
Mean Residual Deviance: 1.01825733804




In [94]:
lb = aml_title.leaderboard
aml_title_leader_metrics = aml_title.leader.model_performance(test_title)

aml_title_leader_metrics


ModelMetricsRegressionGLM: stackedensemble
** Reported on test data. **

MSE: 1.0217669235
RMSE: 1.01082487282
MAE: 0.692810550313
RMSLE: 0.447906899316
R^2: 0.0415301545124
Mean Residual Deviance: 1.0217669235
Null degrees of freedom: 472
Residual degrees of freedom: 470
Null deviance: 504.623837566
Residual deviance: 483.295754816
AIC: 1360.50115372




In [None]:
def predict(stories, w2v, gbm):
    
    words = tokenize(stories["Description"].ascharacter())
    description_vec = w2v.transform(words, aggregate_method="AVERAGE")
    
#     summary_words = tokenize(reviews["Summary"].ascharacter())
#     summary_vec = w2v.transform(summary_words, aggregate_method="AVERAGE")
    
    model_data = stories.cbind(description_vec)#.cbind(summary_vec)
    print(aml.leader.predict(model_data))

#Iteration, type, description
stories = h2o.H2OFrame([
    [15, "feature", "Allow account owners and admins to view and delete github integrations. Super admins can view but not delete", "This includes when a user is looking at the list of github integrations, clicking on a github integration should take the user to the edit or view page depending."],
    [289, "feature", "Allow account owners and admins to view and delete github integrations. Super admins can view but not delete", "This includes when a user is looking at the list of github integrations, clicking on a github integration should take the user to the edit or view page depending."],
    [15, "feature", "Store education box preference via App Settings", "Now that we've seen it work locally, let's add the appropriate logic to the AppSettings endpoint and persist it.\nDo not show preference:\n\nLast time\nRemind me again"],
    [289, "feature", "Store education box preference via App Settings","Now that we've seen it work locally, let's add the appropriate logic to the AppSettings endpoint and persist it.\nDo not show preference:\n\nLast time\nRemind me again"],

])

stories2 = h2o.H2OFrame([
    [15, "feature", "Create the omega primary and warehouse Cloud SQL databases via terraform","Now that we've seen it work locally, let's add the appropriate logic to the AppSettings endpoint and persist it.\nDo not show preference:\n\nLast time\nRemind me again"],
    [647, "feature", "Create the omega primary and warehouse Cloud SQL databases via terraform","Now that we've seen it work locally, let's add the appropriate logic to the AppSettings endpoint and persist it.\nDo not show preference:\n\nLast time\nRemind me again"],
    [647, "feature", "As a Tracker Team Member, I can view a Concourse instance managed by our Infra Team","Spin up in the cf-tracker-production-services project with n >= 1 workers.\n\nAuthentication beyond concourse admin user is out of scope.\n\n## Acceptance criteria\nA concourse instance spun up in the `cf-tracker-production-services` project is accessible via the browser.\n\n---Docs added as part of this story:* https://github.com/pivotaltracker/tracker-docs/blob/master/devops/HowTos/credhub-usage.md* https://github.com/pivotaltracker/tracker-docs/blob/master/devops/HowTos/cloud-workstation-login.md* https://github.com/pivotaltracker/tracker/blob/master/nonapp/concourse-deployment/README.md"]

])

stories.col_names=["Iteration", "Type", "Title", "Description"]

print(predict(stories,description_w2v_model, gbm_model))

Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
