In [None]:
import os
import pandas as pd

from sagemaker import ModelPackage
import sagemaker as sagemaker
from sagemaker import get_execution_role


from src.configuration import Configuration
from src.services import S3, Woocommerce

In [2]:
config = Configuration('configs/config.json').parameters

In [5]:
# region_name, access_key, secret_key, bucket

#  url: str, consumer_key: str, consumer_secret: str

s3 = S3(region_name = config['s3']['region_name'],
        access_key =  config['aws_keys']["access_key"],
       secret_key = config['aws_keys']["secret_key"],
       bucket = config['s3']["bucket"])

woocommerce = Woocommerce(url=config['woocommerce']["url"],
                         consumer_key=config['woocommerce']["consumer_key"],
                         consumer_secret=config['woocommerce']["consumer_secret"])

In [6]:
woocommerce.download_data('data/input_data.csv')

In [7]:
mydata = pd.read_csv('data/input_data.csv')

In [8]:
mydata.columns

Index(['InvoiceID', 'SKUID', 'Item'], dtype='object')

In [9]:
mydata['InvoiceID'].loc[0]

55

In [10]:
mydata.shape

(41604, 3)

In [11]:
mydata.tail(100).to_csv('data/test.csv', index=False)

In [11]:
sample = pd.read_csv('data/sample_input.csv')

In [30]:
data = pd.read_csv('data/sample_input_test.csv')

In [31]:
data

Unnamed: 0,InvoiceID,SKUID,Item
0,536527,22809,SET OF 6 T-LIGHTS SANTA
1,536527,84347,ROTATING SILVER ANGELS T-LIGHT HLDR
2,536527,84945,MULTI COLOUR SILVER T-LIGHT HOLDER
3,536527,22242,5 HOOK HANGER MAGIC TOADSTOOL
4,536527,22244,3 HOOK HANGER MAGIC GARDEN
...,...,...,...
9107,581578,22993,SET OF 4 PANTRY JELLY MOULDS
9108,581578,22907,PACK OF 20 NAPKINS PANTRY DESIGN
9109,581578,22908,PACK OF 20 NAPKINS RED APPLES
9110,581578,23215,JINGLE BELL HEART ANTIQUE SILVER


In [32]:
data_r = data[data['SKUID'].apply(lambda x: x.isnumeric())]

In [33]:
data_r = data_r[data_r['SKUID'].apply(lambda x: int(x)<100)]

In [35]:
data_r.to_csv('data/sample_input_sample.csv', index=False)

In [37]:
s3.upload('data/sample_input_test.csv','input_data.csv')

model_package_arn = config['batch_transform_job']['model_package_arn']

iam = boto3.client('iam')
role = iam.get_role(RoleName=config['batch_transform_job']['role_name'])['Role']['Arn']

sagemaker_session = sagemaker.Session()

model = ModelPackage(model_package_arn=model_package_arn,
                    role = role,
                    sagemaker_session = sagemaker_session)

output_path = 's3://' + os.path.join(config['s3']['bucket'],'output')

transformer = model.transformer(instance_count=config['batch_transform_job']['instance_count'], 
                                instance_type=config['batch_transform_job']['instance_type'],
                                output_path=output_path)

input_path = 's3://' + os.path.join(config["s3"]["bucket"], config["s3"]["input_file"])


transformer.transform(input_path, content_type='text/csv')

transformer.wait()

...............[34m * Serving Flask app "serve" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
 * Running on http://0.0.0.0:8080/ (Press CTRL+C to quit)
 * Restarting with stat
 * Debugger is active!
 * Debugger PIN: 833-460-156[0m
[34m169.254.255.130 - - [14/Apr/2020 18:55:25] "GET /ping HTTP/1.1" 200 -[0m
[34m169.254.255.130 - - [14/Apr/2020 18:55:25] "GET /execution-parameters HTTP/1.1" 404 -[0m
[35m169.254.255.130 - - [14/Apr/2020 18:55:25] "GET /ping HTTP/1.1" 200 -[0m
[35m169.254.255.130 - - [14/Apr/2020 18:55:25] "GET /execution-parameters HTTP/1.1" 404 -[0m
[32m2020-04-14T18:55:25.583:[sagemaker logs]: MaxConcurrentTransforms=1, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD[0m
[34m169.254.255.130 - - [14/Apr/2020 18:55:27] "POST /invocations HTTP/1.1" 200 -[0m
[35m169.254.255.130 - - [14/Apr/2020 18:55:27] "POST /invocations HTTP/1.1" 200 -[0m



In [None]:
ouput_filenme = config["s3"]["input_file"].split('/')[-1]+'.out'

In [None]:
bucket_folder = transformer.output_path.rsplit('/')[3]

In [31]:
s3_conn = boto3.client("s3")
bucket_name=config["s3"]["bucket"]
with open('data/recommendations.csv', 'wb') as f:
    s3_conn.download_fileobj(bucket_name, os.path.join(bucket_folder,ouput_filenme), f)
    print("Output file loaded from bucket")

Output file loaded from bucket


In [28]:
output_df  = pd.read_csv("recommendations.csv")
output_df  = output_df.drop('Unnamed: 0',1)
out_final = output_df
print("Output: ")
out_final.head()

Output: 


Unnamed: 0,Item in cart,Recommendation,Item Support,Support,Confidence,Lift,Leverage,Conviction


# Usage Instructions - Mphasis HyperGraf Market Basket Analysis

Mphasis HyperGraf Market Basket Analysis uncovers associations between articles and identifies the frequent products which are likely to be purchased together by analyzing large volumes of transactional data. Mphasis HyperGraf is an omni- channel digital 360° solution that transforms enterprise decision making by providing the most comprehensive, accurate, real-time and actionable customer engagement insights across millions of data points spread over multiple engagement channels.

### Prerequisite

To run this algorithm you need to have access to the following AWS Services:
- Access to AWS SageMaker and the model package.
- An S3 bucket to specify input/output.
- Role for AWS SageMaker to access input/output from S3.


## Input Data



    1) The input dataset should be in csv format.

    2) The column names in input file should be:

        *  InvoiceID: This is the Invoice Number which is the systematically assigned sequential code unique to each invoice.
        *  SKUID: Stock Keeping Unit ID.
        *  Item: description of item, a string, name of item along with brand name and color name.

    3) More than one items may have same stock keeping unit id(SKUID), but no item can have more than one stock keeping unit id.

    4) The return orders must contain ‘C’ in Invoice number.

    5) No item mentioned in the description can have more than one stock keeping unit.
    
    
- The output file (in csv format) contains the following columns:

    1. Item in cart: A single item added in cart. If nothing added, then the output file contains all the filtered rules based on set threshold values.

    2. Recommendation: The consequent of the item added in the cart.

    3. Item support: The proportion of transactions containing the item in cart (antecedent).

    4. Confidence: Given an antecedent, the probability of the consequent in the same transaction is confidence.

    5. Lift: Measures the dependency of consequent on the antecedent. If Lift = 1, then the antecedent and consequent are independent. If Lift > 1, then given the antecedent, the probability of consequent is greater than the support of the consequent.

    6. Conviction: The dependency of consequent on the antecedent increases with conviction value.

    If conviction = 1, then the antecedent and consequent are independent.

    7. Leverage: The difference between the probability of set of antecedent and consequent and the probability of antecedent and consequent considering independent to each other. If leverage = 0, the antecedent and consequent are independent.

- Generated results are sorted in the decreasing order of item support and can be filtered based on rule support, confidence and lift.