***GENERATED CODE FOR regrssmodel PIPELINE.***

***DON'T EDIT THIS CODE.***

***CONNECTOR FUNCTIONS TO READ DATA.***

In [None]:
import os
import datetime
import logging
import warnings
warnings.filterwarnings('ignore')
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)


class HDFSConnector:

    def fetch(spark, config):
        ################### INPUT HADOOP HOST PORT TO CONNECT WITH ###############################
        hdfs_server = str(os.environ['HDFS_SERVER'])
        hdfs_port = int(os.environ['HDFS_PORT'])
        df = spark.read.options(header='true', inferschema='true').csv(
            f"hdfs://{hdfs_server}:{hdfs_port}{eval(config)['url']}", header='true')
        display(df.limit(2).toPandas())
        return df

    def put(df, spark, config):
        return df.write.format('csv').options(header='true' if eval(config)["is_header"] == "Use Header Line" else 'false',
                                              delimiter=eval(config)["delimiter"]).save(("%s %s") % (datetime.datetime.now().strftime("%Y-%m-%d %H.%M.%S")+"_", eval(config)['url']))


***TRANSFORMATIONS FUNCTIONS THAT WILL BE APPLIED ON DATA***

In [None]:
from pyspark.sql.functions import dayofmonth, month, year, col
import json
from pyspark.ml.feature import Binarizer
from pyspark.sql.functions import round
from pyspark.sql.types import IntegerType
from pyspark.ml.feature import StringIndexer
from pyspark.sql.functions import col, when
from pyspark.sql.types import IntegerType
from pyspark.sql.functions import mean, stddev, min, max, col


class CleanseData:
    # def __init__(self,df):
    #     #print()

    def cleanValueForFE(self, value):
        if value == None:
            return ""
        elif str(value) == 'nan':
            return "nan"
        else:
            return value

    def replaceByMean(self, feature, df, mean_=-1):
        df1 = df
        df1 = df1.dropna()
        meanValue = self.cleanValueForFE(df1.select(
            mean(col(feature.name)).alias('mean')).collect()[0]["mean"])
        df = df.fillna(meanValue, subset=[feature.name])
        df.withColumn(feature.name, when(col(feature.name) == " ",
                      meanValue).otherwise(col(feature.name).cast("Integer")))
        return df

    def replaceByMax(self, feature, df, max_=-1):
        df1 = df
        df1 = df1.dropna()
        maxValue = self.cleanValueForFE(df1.select(
            max(col(feature.name)).alias('max')).collect()[0]["max"])
        df = df.fillna(maxValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", maxValue).otherwise(col(feature.name)))
        return df

    def replaceByMin(self, feature, df, min_=-1):
        df1 = df
        df1 = df1.dropna()
        minValue = self.cleanValueForFE(df1.select(
            min(col(feature.name)).alias('min')).collect()[0]["min"])
        df = df.fillna(minValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", minValue).otherwise(col(feature.name)))
        return df

    def replaceByStandardDeviation(self, feature, df, stddev_=-1):
        df1 = df
        df1 = df1.dropna()
        stddevValue = self.cleanValueForFE(df1.select(
            stddev(col(feature.name)).alias('stddev')).collect()[0]["stddev"])
        df = df.fillna(stddevValue, subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", stddevValue).otherwise(col(feature.name)))
        return df

    def replaceDateRandomly(self, feature, df):
        df1 = df
        df1 = df1.dropna()
        fillValue = self.cleanValueForFE(
            df.where(col(feature.name).isNotNull()).head(1)[0][feature.name])
        df = df.fillna(str(fillValue), subset=[feature.name])
        df = df.withColumn(feature.name,
                           when(col(feature.name) == " ", fillValue).otherwise(col(feature.name)))
        # print("CleanseData:replaceDateRandomly Schema : ", df.#printSchema())
        return df

    def replaceNullValues(self, fList, df):
        featuresList = df.schema.fields
        for featureObj in fList:
            for feat in featuresList:
                if featureObj["feature"] in feat.name:
                    featureName = feat
                    if "mean" in featureObj["replaceby"]:
                        df = self.replaceByMean(featureName, df)
                    elif "max" in featureObj["replaceby"]:
                        df = self.replaceByMax(featureName, df)
                    elif "min" in featureObj["replaceby"]:
                        df = self.replaceByMin(featureName, df)
                    elif "stddev" in featureObj["replaceby"]:
                        df = self.replaceByStandardDeviation(featureName, df)
                    elif "random" in featureObj["replaceby"]:
                        df = self.replaceDateRandomly(featureName, df)
        return df


def StringIndexerTransform(df, params, transformationData={}):
    dfReturn = df
    feature = params["feature"]

    dfReturn = dfReturn.fillna({feature: ''})
    outcol = feature + "_stringindexer"
    indexer = StringIndexer(
        inputCol=feature, outputCol=outcol, handleInvalid="skip")
    indexed = indexer.fit(dfReturn).transform(dfReturn)
    dfReturn = indexed
    distinct_values_list = dfReturn.select(
        outcol).distinct().rdd.map(lambda r: r[0]).collect()
    len_distinct_values_list = len(distinct_values_list)
    if len_distinct_values_list <= 4:
        changed_type_df = dfReturn.withColumn(
            outcol, dfReturn[outcol].cast(IntegerType()))
        return changed_type_df
    return dfReturn


def ExtractDateTransform(df, params, transformationData={}):
    transform_params = params
    dfReturn = df
    feature = transform_params['feature']
    dfReturn = dfReturn.fillna({feature: ''})
    dfReturn = dfReturn.withColumn(
        feature+'dayofmonth', dayofmonth(col(feature)))
    dfReturn = dfReturn.withColumn(feature+'month', month(col(feature)))
    dfReturn = dfReturn.withColumn(feature+'year', year(col(feature)))
    return dfReturn


def BinarizerTransform(df, params, transformationData={}):
    dfReturn = df
    transform_params = params
    feature = transform_params['feature']
    outcol = feature + "_binarizer"
    dfReturn = dfReturn.withColumn("feature_cast", dfReturn[feature].cast("double")).drop(feature)\
        .withColumnRenamed("feature_cast", feature)

    dfReturn = dfReturn.fillna({feature: 0.0})
    binarizer = Binarizer(threshold=float(
        transformationData['threshold']), inputCol=feature, outputCol=outcol)
    binarizedDataFrame = binarizer.transform(dfReturn)

    # binarizedDataFrame=binarizedDataFrame.drop(feature).withColumnRenamed(outcol,feature)

    dfReturn = binarizedDataFrame
    dfReturn = dfReturn.withColumn(feature, round(dfReturn[feature], 2))

    return dfReturn


class TransformationMain:
    # TODO: change df argument in run with following
    def run(transformationDF, config):
        configObj = json.loads(config)
        featureData = configObj["FE"]
        transformationDF = CleanseData().replaceNullValues(featureData, transformationDF)
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'id', 'transformation_label': 'String Indexer'}], 'feature': 'id', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
                                                  'count': '5839', 'mean': '', 'stddev': '', 'min': 'AV13V_i2glJLPUi8PFgb', 'max': 'AWIm0C3TYSSHbkXwx3S6', 'missing': '0', 'distinct': '38'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'id'}, {'feature_label': 'id', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('id')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'asins', 'transformation_label': 'String Indexer'}], 'feature': 'asins', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': 'B000O3TFWW', 'max': 'B075WKS4D8', 'missing': '0', 'distinct': '45'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'asins'}, {'feature_label': 'asins', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('asins')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'brand', 'transformation_label': 'String Indexer'}], 'feature': 'brand', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': 'Alpine', 'max': 'Yamaha', 'missing': '0', 'distinct': '35'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'brand'}, {'feature_label': 'brand', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('brand')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'categories', 'transformation_label': 'String Indexer'}], 'feature': 'categories', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {'count': '5839', 'mean': '', 'stddev': '', 'min': 'Accessories,Portable Power Banks,Portable Chargers/Power Packs,Cell Phones & Accessories,Cell Phones,Portable Battery Packs,Cell Phone Accessories,Cell Phone Batteries & Power,Batteries & Battery Packs,cell,Power,Chargers & Cradles',
                                                                                                                                                                                                                                                                      'max': 'stone products,electronics,Parts & Accessories,brick manufacturing,landmark stone,Car Electronics,brick manufacturing process,eBay Motors,natural stone,brick designs,Digital Media Receivers,Car Stereo Receivers,Vehicle Electronics & GPS,brick,Electronics Features,Video In-Dash Units w/o GPS,Car Video,thin brick,Car Audio In-Dash Units,Car Audio,Consumer Electronics,brick sizes,glen gery,manufactured stone,Car Video Units W/out GPS/Nav,Apple CarPlay Receivers,In-Dash with GPS,brick colors,Car Electronics & GPS', 'missing': '0', 'distinct': '38'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'categories'}, {'feature_label': 'categories', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('categories')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'colors', 'transformation_label': 'String Indexer'}], 'feature': 'colors', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': 'Black', 'max': 'White', 'missing': '0', 'distinct': '16'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'colors'}, {'feature_label': 'colors', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('colors')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'dateAdded', 'transformation_label': 'Extract Date'}], 'feature': 'dateAdded', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'False', 'updatedLabel': 'dateAdded'}, {'feature_label': 'dateAdded', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('dateAdded')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'dateUpdated', 'transformation_label': 'Extract Date'}], 'feature': 'dateUpdated', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '0'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'False', 'updatedLabel': 'dateUpdated'}, {'feature_label': 'dateUpdated', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('dateUpdated')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'dimension', 'transformation_label': 'String Indexer'}], 'feature': 'dimension', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': '0.75 in x 3.25 in x 2 in', 'max': '9.2 x 7.6 x 5.2 inches', 'missing': '0', 'distinct': '29'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'dimension'}, {'feature_label': 'dimension', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('dimension')
        transformationDF = BinarizerTransform(transformationDF, {'transformationsData': [{'feature_label': 'ean', 'threshold': 2147483647, 'transformation_label': 'Binarizer'}], 'feature': 'ean', 'type': 'real', 'selected': 'True', 'replaceby': 'mean', 'stats': {
            'count': '5839', 'mean': '2147483647.0', 'stddev': '0.0', 'min': '2.147483647E9', 'max': '2.147483647E9', 'missing': '0'}, 'transformation': [{'transformation': 'Binarizer', 'selectedAsDefault': 1}], 'updatedLabel': 'ean'}, {'feature_label': 'ean', 'threshold': 2147483647, 'transformation_label': 'Binarizer'})
        transformationDF = transformationDF.drop('ean')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'imageURLs', 'transformation_label': 'String Indexer'}], 'feature': 'imageURLs', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {'count': '5839', 'mean': '', 'stddev': '', 'min': 'http://i.ebayimg.com/00/$T2eC16dHJGQE9noMbUGIBRFRjWkmIQ~~_10.JPG?set_11.JPG?set_id=807,http://static.bhphoto.com/images/multiple_images/thumbnails/1398431703000_IMG_387337.jpg,https://i.ebayimg.com/images/g/M3MAAOSw4GVYPE-5/s-l300.jpg,http://i.ebayimg.com/images/g/M3MAAOSw4GVYPE-5/s-l64.jpg,http://static.bhphoto.com/images/multiple_images/thumbnails/1398431703000_IMG_387335.jpg,http://i.ebayimg.com/images/g/HWwAAOSwrklU~zA3/s-l64.jpg,http://i.ebayimg.com/images/g/POQAAOSweW5U~zA2/s-l64.jpg,http://i.ebayimg.com/images/g/CBwAAOSwEeFU~zAw/s-l64.jpg,http://i.ebayimg.com/images/g/11wAAOSweW5VJsJ2/s-l64.jpg,http://static.bhphoto.com/images/images500x500/samsung_eb_p310siwesta_universal_battery_pack_3100mah_1398432017000_1046376.jpg,http://i.ebayimg.com/images/g/X2gAAOSwFe5X0F9F/s-l64.jpg,http://i.ebayimg.com/images/g/-FEAAOSwqu9U~zAy/s-l64.jpg,http://pisces.bbystatic.com/image2/BestBuy_US/images/products/5573/5573022_sa.jpg,https://images-na.ssl-images-amazon.com/images/I/61sGC6ThWGL._SL1500_.jpg,https://pisces.bbystatic.com/image2/BestBuy_US/images/products/5573/5573022_sa.jpg,http://static.bhphoto.com/images/smallimages/1398432017000_1046376.jpg,http://static.bhphoto.com/images/multiple_images/thumbnails/1398431703000_IMG_387336.jpg,http://i.ebayimg.com/images/g/JzcAAOSwBahU~zAz/s-l64.jpg,http://static.bhphoto.com/images/multiple_images/thumbnails/1398431703000_IMG_387334.jpg,http://i.ebayimg.com/images/g/2QoAAOSw-kdX0F8q/s-l64.jpg',
                                                                                                                                                                                                                                                                    'max': 'https://static.bhphoto.com/images/itemImgPlaceholder.jpg,https://static.bhphoto.com/images/images500x500/jbl_v700nxtwht_everest_elite_700_around_ear_1462290298000_1240681.jpg,https://images-na.ssl-images-amazon.com/images/I/61KvF2V1psL._SL1182_.jpg,https://images-na.ssl-images-amazon.com/images/I/6182Y0cLFzL._SL1216_.jpg,https://images-na.ssl-images-amazon.com/images/I/61BLgM5ZrNL._SL1000_.jpg,https://images-na.ssl-images-amazon.com/images/I/61iRIvyc4UL._SL1043_.jpg,https://images-na.ssl-images-amazon.com/images/I/6145Bk%2BCA7L._SL1500_.jpg,https://images-na.ssl-images-amazon.com/images/I/41I8t3Q1zJL._SL1000_.jpg,https://images-na.ssl-images-amazon.com/images/I/61c9cmVRlLL._SL1345_.jpg,https://images-na.ssl-images-amazon.com/images/I/51l%2BeB2GuSL._SL1000_.jpg,https://pisces.bbystatic.com/image2/BestBuy_US/images/products/4403/4403709_ra.jpg,https://pisces.bbystatic.com/image2/BestBuy_US/images/products/4403/4403709_rd.jpg,https://i5.walmartimages.com/asr/28c0277a-eea5-453f-8224-761b97bb9e24_1.edd2d096d9e81bc238c44aaf0edb88c4.jpeg?odnHeight=450&odnWidth=450&odnBg=FFFFFF,https://i5.walmartimages.com/asr/0d8f3a61-da23-407a-8e53-c49192431cec_1.af197ae7fdc037da8174e56520d8371a.jpeg?odnHeight=450&odnWidth=450&odnBg=FFFFFF,https://i5.walmartimages.com/asr/78b820b7-3241-4516-b505-054e20b05304_1.f32e62172cf6257b3be39510dd959103.jpeg?odnHeight=450&odnWidth=450&odnBg=FFFFFF,http://pisces.bbystatic.com/image2/BestBuy_US/images/products/4403/4403709_ra.jpg,http://pisces.bbystatic.com/image2/BestBuy_US/images/products/4403/4403709_rd.jpg,http://images.frys.com/art/product/box_shots/8808212.box.GIF', 'missing': '0', 'distinct': '45'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'imageURLs'}, {'feature_label': 'imageURLs', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('imageURLs')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'keys', 'transformation_label': 'String Indexer'}], 'feature': 'keys', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {'count': '5839', 'mean': '', 'stddev': '', 'min': '0027242896000,sony/mexm100bt,sonymexm100bt160wrmsmarinecdreceiverwithbluetoothblacksiriusxmready/b01ceat9zu,027242896000,sonyindashcddmreceiverbuiltinbluetoothsatelliteradioreadywithdetachablefaceplateblack/5495318,sonyindashcddmreceiverbuiltinbluetoothsatelliteradioreadywithdetachablefaceplateblack/b01ceat9zu',
                                                                                                                                                                                                                                                          'max': 'yamaha40wmicrocomponentsystemblack/yacrx332bl,yamaha40wmicrocomponentsystemblack/2877396,yamaha40wmicrocomponentsystemblack/162508611472,yamaha40wmicrocomponentsystemblack/b005yxxs4i,yamaha/crx332bl,027108939599,crx322cdreceiver/yacrx332bl', 'missing': '0', 'distinct': '38'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'keys'}, {'feature_label': 'keys', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('keys')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'manufacturer', 'transformation_label': 'String Indexer'}], 'feature': 'manufacturer', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '3732', 'mean': '', 'stddev': '', 'min': '5 Years', 'max': 'iHome', 'missing': '2424', 'distinct': '18'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'manufacturer'}, {'feature_label': 'manufacturer', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('manufacturer')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'manufacturerNumber', 'transformation_label': 'String Indexer'}], 'feature': 'manufacturerNumber', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': '45W SLIM AC ADAPTER - 88801419', 'max': 'XPS8920-7529SLV-PUS', 'missing': '0', 'distinct': '45'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'manufacturerNumber'}, {'feature_label': 'manufacturerNumber', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('manufacturerNumber')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'name', 'transformation_label': 'String Indexer'}], 'feature': 'name', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': '"Alpine - 6-1/2"" 2-Way Coaxial Car Speakers with Polypropylene Cones (Pair) - Black"', 'max': 'iHome Rechargeable Splash Proof Stereo Bluetooth Speaker - Black (IBT33BC)', 'missing': '0', 'distinct': '47'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'name'}, {'feature_label': 'name', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('name')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'primaryCategories', 'transformation_label': 'String Indexer'}], 'feature': 'primaryCategories', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': 'Electronics', 'max': 'Electronics', 'missing': '0', 'distinct': '1'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'primaryCategories'}, {'feature_label': 'primaryCategories', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('primaryCategories')
        transformationDF = ExtractDateTransform(transformationDF, {'transformationsData': [{'feature_label': 'reviewsdate', 'transformation_label': 'Extract Date'}], 'feature': 'reviewsdate', 'type': 'date', 'selected': 'True', 'replaceby': 'random', 'stats': {
            'count': '', 'mean': '', 'stddev': '', 'min': '', 'max': '', 'missing': '60'}, 'transformation': [{'transformation': 'Extract Date', 'selectedAsDefault': 1}], 'generated': 'False', 'updatedLabel': 'reviewsdate'}, {'feature_label': 'reviewsdate', 'transformation_label': 'Extract Date'})
        transformationDF = transformationDF.drop('reviewsdate')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'reviewsdateSeen', 'transformation_label': 'String Indexer'}], 'feature': 'reviewsdateSeen', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': '2014-10-22T00:00:00Z', 'max': '2018-05-27T00:00:00Z,2018-05-26T00:00:00Z', 'missing': '0', 'distinct': '837'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'reviewsdateSeen'}, {'feature_label': 'reviewsdateSeen', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('reviewsdateSeen')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'reviewssourceURLs', 'transformation_label': 'String Indexer'}], 'feature': 'reviewssourceURLs', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': 'http://reviews.bestbuy.com/3545/1416147/reviews.htm?format=embedded', 'max': 'https://www.walmart.com/reviews/product/51933274', 'missing': '0', 'distinct': '1955'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'reviewssourceURLs'}, {'feature_label': 'reviewssourceURLs', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('reviewssourceURLs')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'reviewstext', 'transformation_label': 'String Indexer'}], 'feature': 'reviewstext', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {'count': '5839', 'mean': '', 'stddev': '', 'min': "!!!!!DO NOT BUY IF YOU HAVE A MAC!!!!!silverlight and harmony plugins are CLEARLY NOT production ready. WILL NOT SYNC!!! tried different macs, different OS', etc.i have an apple tattoo and have been using logitech for years. not anymore. and my friends at CNET will be hearing about this shortly.oh, and i almost forgot to mention. the ONLY tech support option besides the user forums is a 29 an hour waste of time trying to troubleshoot the silverlight plugin.",
                                                                                                                                                                                                                                                                        'max': 'yamaha 5.1 speaker sistema sound is very good, all in te box, I buy a little more wire.', 'missing': '0', 'distinct': '5772'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'reviewstext'}, {'feature_label': 'reviewstext', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('reviewstext')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'reviewstitle', 'transformation_label': 'String Indexer'}], 'feature': 'reviewstitle', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': ' ""Whatch DVD"" etc. You can also have custom settings like; "" ""Play Pandora"" click your choice and it does all the work.If something doesn\'t come on right. Like if you don\'t point it right', 'max': 'you can tweek the sound in app', 'missing': '0', 'distinct': '4472'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'reviewstitle'}, {'feature_label': 'reviewstitle', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('reviewstitle')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'reviewsusername', 'transformation_label': 'String Indexer'}], 'feature': 'reviewsusername', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '2004474.55', 'stddev': '6415011.39', 'min': ' ""Hey', 'max': 'zznj', 'missing': '0', 'distinct': '5246'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'reviewsusername'}, {'feature_label': 'reviewsusername', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('reviewsusername')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'sourceURLs', 'transformation_label': 'String Indexer'}], 'feature': 'sourceURLs', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '', 'stddev': '', 'min': ' ""Battery high/low""', 'max': 'weatherworrier', 'missing': '0', 'distinct': '137'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'sourceURLs'}, {'feature_label': 'sourceURLs', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('sourceURLs')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'upc', 'transformation_label': 'String Indexer'}], 'feature': 'upc', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '395802461569.11', 'stddev': '374306735051.74', 'min': ' Blu-ray', 'max': 'wNg11', 'missing': '0', 'distinct': '117'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'upc'}, {'feature_label': 'upc', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('upc')
        transformationDF = StringIndexerTransform(transformationDF, {'transformationsData': [{'feature_label': 'weight', 'transformation_label': 'String Indexer'}], 'feature': 'weight', 'type': 'string', 'selected': 'True', 'replaceby': 'max', 'stats': {
            'count': '5839', 'mean': '440732502627.56', 'stddev': '380454569026.11', 'min': ' I don\'t need to switch to ""modes"" when using different devices', 'max': 'scapula', 'missing': '0', 'distinct': '126'}, 'transformation': [{'transformation': 'String Indexer', 'selectedAsDefault': 1}], 'updatedLabel': 'weight'}, {'feature_label': 'weight', 'transformation_label': 'String Indexer'})
        transformationDF = transformationDF.drop('weight')
        display(transformationDF.limit(2).toPandas())
        return transformationDF


***AUTOML FUNCTIONS***

In [None]:
from sklearn.model_selection import train_test_split
from tpot import TPOTRegressor
import pyspark


def functionRegression(sparkDF, listOfFeatures, label):
    sparkDF.persist(pyspark.StorageLevel.MEMORY_AND_DISK)
    df = sparkDF.toPandas()
    X = (df.drop(label, axis=1))[listOfFeatures].values
    y = df[label].values
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=1, test_size=0.1)
    tpotModel = TPOTRegressor(verbosity=3, generations=10, max_time_mins=5,
                              n_jobs=-1, random_state=25, population_size=15, use_dask=True)
    tpotModel.fit(X_train, y_train)
    display(" Error rate of Model : %s" % tpotModel.score(X_test, y_test))
    data = {'model': tpotModel,
            'X_test': X_test,
            'y_test': y_test,
            'label': label,
            'columnNames': listOfFeatures}
    return data


***READING DATAFRAME***

In [None]:
############## CREATE SPARK SESSION ############################ ENTER YOUR SPARK MASTER IP AND PORT TO CONNECT TO SERVER ################
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local[1]').getOrCreate()
#%run regrssmodelHooks.ipynb
try:
	#sourcePreExecutionHook()

	electronicsssproductdatacleaned = HDFSConnector.fetch(spark, "{'url': '/FileStore/platform/uploadedSourceFiles/ElectronicsssProductDataCleaned.csv', 'filename': 'ElectronicsssProductDataCleaned.csv', 'delimiter': ',', 'file_type': 'Delimeted', 'is_header': 'Use Header Line', 'domain': 'http://172.31.59.158', 'port': '40070', 'dirPath': '/FileStore/platform', 'server_url': '/numtraPlatform/NumtraPlatformV3/uploads/platform/'}")
	#sourcePostExecutionHook(electronicsssproductdatacleaned)

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***TRANSFORMING DATAFRAME***

In [None]:
#%run regrssmodelHooks.ipynb
try:
	#transformationPreExecutionHook()

	regrssmodelautofe = TransformationMain.run(electronicsssproductdatacleaned,json.dumps( {"FE": [{"transformationsData": [{"feature_label": "id", "transformation_label": "String Indexer"}], "feature": "id", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "AV13V_i2glJLPUi8PFgb", "max": "AWIm0C3TYSSHbkXwx3S6", "missing": "0", "distinct": "38"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "id"}, {"transformationsData": [{"feature_label": "asins", "transformation_label": "String Indexer"}], "feature": "asins", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "B000O3TFWW", "max": "B075WKS4D8", "missing": "0", "distinct": "45"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "asins"}, {"transformationsData": [{"feature_label": "brand", "transformation_label": "String Indexer"}], "feature": "brand", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "Alpine", "max": "Yamaha", "missing": "0", "distinct": "35"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "brand"}, {"transformationsData": [{"feature_label": "categories", "transformation_label": "String Indexer"}], "feature": "categories", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "Accessories,Portable Power Banks,Portable Chargers/Power Packs,Cell Phones & Accessories,Cell Phones,Portable Battery Packs,Cell Phone Accessories,Cell Phone Batteries & Power,Batteries & Battery Packs,cell,Power,Chargers & Cradles", "max": "stone products,electronics,Parts & Accessories,brick manufacturing,landmark stone,Car Electronics,brick manufacturing process,eBay Motors,natural stone,brick designs,Digital Media Receivers,Car Stereo Receivers,Vehicle Electronics & GPS,brick,Electronics Features,Video In-Dash Units w/o GPS,Car Video,thin brick,Car Audio In-Dash Units,Car Audio,Consumer Electronics,brick sizes,glen gery,manufactured stone,Car Video Units W/out GPS/Nav,Apple CarPlay Receivers,In-Dash with GPS,brick colors,Car Electronics & GPS", "missing": "0", "distinct": "38"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "categories"}, {"transformationsData": [{"feature_label": "colors", "transformation_label": "String Indexer"}], "feature": "colors", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "Black", "max": "White", "missing": "0", "distinct": "16"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "colors"}, {"transformationsData": [{"feature_label": "dateAdded", "transformation_label": "Extract Date"}], "feature": "dateAdded", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "False", "updatedLabel": "dateAdded"}, {"transformationsData": [{"feature_label": "dateUpdated", "transformation_label": "Extract Date"}], "feature": "dateUpdated", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "0"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "False", "updatedLabel": "dateUpdated"}, {"transformationsData": [{"feature_label": "dimension", "transformation_label": "String Indexer"}], "feature": "dimension", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "0.75 in x 3.25 in x 2 in", "max": "9.2 x 7.6 x 5.2 inches", "missing": "0", "distinct": "29"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "dimension"}, {"transformationsData": [{"feature_label": "ean", "threshold": 2147483647, "transformation_label": "Binarizer"}], "feature": "ean", "type": "real", "selected": "True", "replaceby": "mean", "stats": {"count": "5839", "mean": "2147483647.0", "stddev": "0.0", "min": "2.147483647E9", "max": "2.147483647E9", "missing": "0"}, "transformation": [{"transformation": "Binarizer", "selectedAsDefault": 1}], "updatedLabel": "ean"}, {"transformationsData": [{"feature_label": "imageURLs", "transformation_label": "String Indexer"}], "feature": "imageURLs", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "http://i.ebayimg.com/00/$T2eC16dHJGQE9noMbUGIBRFRjWkmIQ~~_10.JPG?set_11.JPG?set_id=807,http://static.bhphoto.com/images/multiple_images/thumbnails/1398431703000_IMG_387337.jpg,https://i.ebayimg.com/images/g/M3MAAOSw4GVYPE-5/s-l300.jpg,http://i.ebayimg.com/images/g/M3MAAOSw4GVYPE-5/s-l64.jpg,http://static.bhphoto.com/images/multiple_images/thumbnails/1398431703000_IMG_387335.jpg,http://i.ebayimg.com/images/g/HWwAAOSwrklU~zA3/s-l64.jpg,http://i.ebayimg.com/images/g/POQAAOSweW5U~zA2/s-l64.jpg,http://i.ebayimg.com/images/g/CBwAAOSwEeFU~zAw/s-l64.jpg,http://i.ebayimg.com/images/g/11wAAOSweW5VJsJ2/s-l64.jpg,http://static.bhphoto.com/images/images500x500/samsung_eb_p310siwesta_universal_battery_pack_3100mah_1398432017000_1046376.jpg,http://i.ebayimg.com/images/g/X2gAAOSwFe5X0F9F/s-l64.jpg,http://i.ebayimg.com/images/g/-FEAAOSwqu9U~zAy/s-l64.jpg,http://pisces.bbystatic.com/image2/BestBuy_US/images/products/5573/5573022_sa.jpg,https://images-na.ssl-images-amazon.com/images/I/61sGC6ThWGL._SL1500_.jpg,https://pisces.bbystatic.com/image2/BestBuy_US/images/products/5573/5573022_sa.jpg,http://static.bhphoto.com/images/smallimages/1398432017000_1046376.jpg,http://static.bhphoto.com/images/multiple_images/thumbnails/1398431703000_IMG_387336.jpg,http://i.ebayimg.com/images/g/JzcAAOSwBahU~zAz/s-l64.jpg,http://static.bhphoto.com/images/multiple_images/thumbnails/1398431703000_IMG_387334.jpg,http://i.ebayimg.com/images/g/2QoAAOSw-kdX0F8q/s-l64.jpg", "max": "https://static.bhphoto.com/images/itemImgPlaceholder.jpg,https://static.bhphoto.com/images/images500x500/jbl_v700nxtwht_everest_elite_700_around_ear_1462290298000_1240681.jpg,https://images-na.ssl-images-amazon.com/images/I/61KvF2V1psL._SL1182_.jpg,https://images-na.ssl-images-amazon.com/images/I/6182Y0cLFzL._SL1216_.jpg,https://images-na.ssl-images-amazon.com/images/I/61BLgM5ZrNL._SL1000_.jpg,https://images-na.ssl-images-amazon.com/images/I/61iRIvyc4UL._SL1043_.jpg,https://images-na.ssl-images-amazon.com/images/I/6145Bk%2BCA7L._SL1500_.jpg,https://images-na.ssl-images-amazon.com/images/I/41I8t3Q1zJL._SL1000_.jpg,https://images-na.ssl-images-amazon.com/images/I/61c9cmVRlLL._SL1345_.jpg,https://images-na.ssl-images-amazon.com/images/I/51l%2BeB2GuSL._SL1000_.jpg,https://pisces.bbystatic.com/image2/BestBuy_US/images/products/4403/4403709_ra.jpg,https://pisces.bbystatic.com/image2/BestBuy_US/images/products/4403/4403709_rd.jpg,https://i5.walmartimages.com/asr/28c0277a-eea5-453f-8224-761b97bb9e24_1.edd2d096d9e81bc238c44aaf0edb88c4.jpeg?odnHeight=450&odnWidth=450&odnBg=FFFFFF,https://i5.walmartimages.com/asr/0d8f3a61-da23-407a-8e53-c49192431cec_1.af197ae7fdc037da8174e56520d8371a.jpeg?odnHeight=450&odnWidth=450&odnBg=FFFFFF,https://i5.walmartimages.com/asr/78b820b7-3241-4516-b505-054e20b05304_1.f32e62172cf6257b3be39510dd959103.jpeg?odnHeight=450&odnWidth=450&odnBg=FFFFFF,http://pisces.bbystatic.com/image2/BestBuy_US/images/products/4403/4403709_ra.jpg,http://pisces.bbystatic.com/image2/BestBuy_US/images/products/4403/4403709_rd.jpg,http://images.frys.com/art/product/box_shots/8808212.box.GIF", "missing": "0", "distinct": "45"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "imageURLs"}, {"transformationsData": [{"feature_label": "keys", "transformation_label": "String Indexer"}], "feature": "keys", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "0027242896000,sony/mexm100bt,sonymexm100bt160wrmsmarinecdreceiverwithbluetoothblacksiriusxmready/b01ceat9zu,027242896000,sonyindashcddmreceiverbuiltinbluetoothsatelliteradioreadywithdetachablefaceplateblack/5495318,sonyindashcddmreceiverbuiltinbluetoothsatelliteradioreadywithdetachablefaceplateblack/b01ceat9zu", "max": "yamaha40wmicrocomponentsystemblack/yacrx332bl,yamaha40wmicrocomponentsystemblack/2877396,yamaha40wmicrocomponentsystemblack/162508611472,yamaha40wmicrocomponentsystemblack/b005yxxs4i,yamaha/crx332bl,027108939599,crx322cdreceiver/yacrx332bl", "missing": "0", "distinct": "38"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "keys"}, {"transformationsData": [{"feature_label": "manufacturer", "transformation_label": "String Indexer"}], "feature": "manufacturer", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "3732", "mean": "", "stddev": "", "min": "5 Years", "max": "iHome", "missing": "2424", "distinct": "18"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "manufacturer"}, {"transformationsData": [{"feature_label": "manufacturerNumber", "transformation_label": "String Indexer"}], "feature": "manufacturerNumber", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "45W SLIM AC ADAPTER - 88801419", "max": "XPS8920-7529SLV-PUS", "missing": "0", "distinct": "45"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "manufacturerNumber"}, {"transformationsData": [{"feature_label": "name", "transformation_label": "String Indexer"}], "feature": "name", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "\"Alpine - 6-1/2\"\" 2-Way Coaxial Car Speakers with Polypropylene Cones (Pair) - Black\"", "max": "iHome Rechargeable Splash Proof Stereo Bluetooth Speaker - Black (IBT33BC)", "missing": "0", "distinct": "47"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "name"}, {"transformationsData": [{"feature_label": "primaryCategories", "transformation_label": "String Indexer"}], "feature": "primaryCategories", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "Electronics", "max": "Electronics", "missing": "0", "distinct": "1"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "primaryCategories"}, {"transformationsData": [{"feature_label": "reviewsdate", "transformation_label": "Extract Date"}], "feature": "reviewsdate", "type": "date", "selected": "True", "replaceby": "random", "stats": {"count": "", "mean": "", "stddev": "", "min": "", "max": "", "missing": "60"}, "transformation": [{"transformation": "Extract Date", "selectedAsDefault": 1}], "generated": "False", "updatedLabel": "reviewsdate"}, {"transformationsData": [{"feature_label": "reviewsdateSeen", "transformation_label": "String Indexer"}], "feature": "reviewsdateSeen", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "2014-10-22T00:00:00Z", "max": "2018-05-27T00:00:00Z,2018-05-26T00:00:00Z", "missing": "0", "distinct": "837"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "reviewsdateSeen"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "reviewsdoRecommend", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "5839", "mean": "0.74", "stddev": "0.44", "min": "0", "max": "1", "missing": "0"}, "updatedLabel": "reviewsdoRecommend"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "reviewsnumHelpful", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "5839", "mean": "0.63", "stddev": "3.23", "min": "0", "max": "128", "missing": "0"}, "updatedLabel": "reviewsnumHelpful"}, {"transformationsData": [{"transformation_label": "novalue"}], "feature": "reviewsrating", "transformation": [{"transformation": "novalue", "selectedAsDefault": 1}], "type": "numeric", "replaceby": "mean", "selected": "True", "stats": {"count": "5839", "mean": "4.39", "stddev": "1.0", "min": "1", "max": "5", "missing": "0"}, "updatedLabel": "reviewsrating"}, {"transformationsData": [{"feature_label": "reviewssourceURLs", "transformation_label": "String Indexer"}], "feature": "reviewssourceURLs", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "http://reviews.bestbuy.com/3545/1416147/reviews.htm?format=embedded", "max": "https://www.walmart.com/reviews/product/51933274", "missing": "0", "distinct": "1955"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "reviewssourceURLs"}, {"transformationsData": [{"feature_label": "reviewstext", "transformation_label": "String Indexer"}], "feature": "reviewstext", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": "!!!!!DO NOT BUY IF YOU HAVE A MAC!!!!!silverlight and harmony plugins are CLEARLY NOT production ready. WILL NOT SYNC!!! tried different macs, different OS', etc.i have an apple tattoo and have been using logitech for years. not anymore. and my friends at CNET will be hearing about this shortly.oh, and i almost forgot to mention. the ONLY tech support option besides the user forums is a 29 an hour waste of time trying to troubleshoot the silverlight plugin.", "max": "yamaha 5.1 speaker sistema sound is very good, all in te box, I buy a little more wire.", "missing": "0", "distinct": "5772"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "reviewstext"}, {"transformationsData": [{"feature_label": "reviewstitle", "transformation_label": "String Indexer"}], "feature": "reviewstitle", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": " \"\"Whatch DVD\"\" etc. You can also have custom settings like; \"\" \"\"Play Pandora\"\" click your choice and it does all the work.If something doesn't come on right. Like if you don't point it right", "max": "you can tweek the sound in app", "missing": "0", "distinct": "4472"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "reviewstitle"}, {"transformationsData": [{"feature_label": "reviewsusername", "transformation_label": "String Indexer"}], "feature": "reviewsusername", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "2004474.55", "stddev": "6415011.39", "min": " \"\"Hey", "max": "zznj", "missing": "0", "distinct": "5246"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "reviewsusername"}, {"transformationsData": [{"feature_label": "sourceURLs", "transformation_label": "String Indexer"}], "feature": "sourceURLs", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "", "stddev": "", "min": " \"\"Battery high/low\"\"", "max": "weatherworrier", "missing": "0", "distinct": "137"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "sourceURLs"}, {"transformationsData": [{"feature_label": "upc", "transformation_label": "String Indexer"}], "feature": "upc", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "395802461569.11", "stddev": "374306735051.74", "min": " Blu-ray", "max": "wNg11", "missing": "0", "distinct": "117"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "upc"}, {"transformationsData": [{"feature_label": "weight", "transformation_label": "String Indexer"}], "feature": "weight", "type": "string", "selected": "True", "replaceby": "max", "stats": {"count": "5839", "mean": "440732502627.56", "stddev": "380454569026.11", "min": " I don't need to switch to \"\"modes\"\" when using different devices", "max": "scapula", "missing": "0", "distinct": "126"}, "transformation": [{"transformation": "String Indexer", "selectedAsDefault": 1}], "updatedLabel": "weight"}]}))

	#transformationPostExecutionHook(regrssmodelautofe)

except Exception as ex: 
	logging.error(ex)


***TRAIN MODEL***

In [None]:
#%run regrssmodelHooks.ipynb
try:
	#mlPreExecutionHook()

	dataAutoML=functionRegression(regrssmodelautofe, ["reviewsdoRecommend", "reviewsnumHelpful", "reviewsrating", "id_stringindexer", "asins_stringindexer", "brand_stringindexer", "categories_stringindexer", "colors_stringindexer", "dateAdded_dayofmonth", "dateAdded_month", "dateAdded_year", "dateUpdated_dayofmonth", "dateUpdated_month", "dateUpdated_year", "dimension_stringindexer", "ean_binarizer", "imageURLs_stringindexer", "keys_stringindexer", "manufacturer_stringindexer", "manufacturerNumber_stringindexer", "primaryCategories_stringindexer", "reviewsdate_dayofmonth", "reviewsdate_month", "reviewsdate_year", "reviewsdateSeen_stringindexer", "reviewssourceURLs_stringindexer", "reviewstext_stringindexer", "reviewstitle_stringindexer", "reviewsusername_stringindexer", "sourceURLs_stringindexer", "upc_stringindexer", "weight_stringindexer"], "name_stringindexer")

	#mlPostExecutionHook(dataAutoML)

except Exception as ex: 
	logging.error(ex)
#spark.stop()


***PREDICT ON TRAINED MODEL***

In [None]:
import pandas as pd
import numpy as np
import sklearn.metrics

try:
    model=dataAutoML ['model']
    X_test=dataAutoML['X_test']
    y_test=dataAutoML['y_test']
    label=dataAutoML['label']
    columnNames=dataAutoML['columnNames']
    if label in columnNames:
        columnNames.remove(label)
    predicted=label+"_predicted"
    y_predicted=model.predict(X_test)
    df =pd.DataFrame(X_test , columns=columnNames)
    df[label]=y_test
    df[predicted]=y_predicted
    columnNames.insert(0,predicted)
    columnNames.insert(0,label)
    df = df[columnNames]
    R2 = np.round(sklearn.metrics.r2_score(y_test, y_predicted), 1)
    Mean_Squared_Error = np.round(sklearn.metrics.mean_squared_error(y_test, y_predicted), 1)
    Mean_Absolute_Error = np.round(sklearn.metrics.mean_absolute_error(y_test, y_predicted), 1)
    display(" R2 score of Prediction on test data    : %s"%R2)
    display(" Mean Squared Error of Prediction on test data    : %s"%Mean_Squared_Error)
    display(" Mean Absolute Error of Prediction on test data   : %s"%Mean_Absolute_Error)
    display(df.head())
except Exception as ex:
    logging.error(ex)

spark.stop()

