# Create Lambda for API Gateway

## Setup

In [1]:
import boto3
import pandas as pd
import numpy as np
import time
import sagemaker
from sagemaker.session import Session
from sagemaker.feature_store.feature_group import FeatureGroup

In [2]:
%env AWS_PROFILE=aeroxye-sagemaker

env: AWS_PROFILE=aeroxye-sagemaker


In [3]:
!aws sts get-caller-identity

{
    "UserId": "AROAWC4YSIQL5OBFCNGEX:botocore-session-1687435302",
    "Account": "418542404631",
    "Arn": "arn:aws:sts::418542404631:assumed-role/SageMaker-UserRole/botocore-session-1687435302"
}


In [4]:
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='SageMaker-UserRole')['Role']['Arn']

region = boto3.Session().region_name
print(f'Current region: {region}')

boto_session = boto3.Session(region_name=region)
sagemaker_session = sagemaker.Session(boto_session=boto_session)
sagemaker_client = boto_session.client(service_name='sagemaker', region_name=region)

Current region: ap-southeast-1


## Create Lambda

In [5]:
from sagemaker.lambda_helper import Lambda

# Lambda helper class can be used to create the Lambda function
func = Lambda(
    function_name="retrieve-ranking",
    execution_role_arn=role,
    handler="process_new_user.handler",
    zipped_code_dir="./process_users.zip",
    environment={
        "Variables": {
            "REGION":region,
        }
    },
    timeout=600,
    memory_size=256,
)

In [6]:
func.update()

{'ResponseMetadata': {'RequestId': '6c368329-772a-4eb8-929a-2b2480dee2ed',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Thu, 22 Jun 2023 12:22:42 GMT',
   'content-type': 'application/json',
   'content-length': '1458',
   'connection': 'keep-alive',
   'x-amzn-requestid': '6c368329-772a-4eb8-929a-2b2480dee2ed'},
  'RetryAttempts': 0},
 'FunctionName': 'retrieve-ranking',
 'FunctionArn': 'arn:aws:lambda:ap-southeast-1:418542404631:function:retrieve-ranking',
 'Runtime': 'python3.8',
 'Role': 'arn:aws:iam::418542404631:role/SageMaker-UserRole',
 'Handler': 'process_new_user.handler',
 'CodeSize': 3884,
 'Description': '',
 'Timeout': 600,
 'MemorySize': 128,
 'LastModified': '2023-06-22T12:22:42.000+0000',
 'CodeSha256': 'r0GxctSqrNc0kztvv7TW3C+S7hayEdAtUFD4YQCgZ54=',
 'Version': '$LATEST',
 'Environment': {'Variables': {'REGION': 'ap-southeast-1'}},
 'TracingConfig': {'Mode': 'PassThrough'},
 'RevisionId': 'e1b3db45-ec55-44a0-adf5-82987d4939ab',
 'Layers': [{'Arn': 'arn:aws:lamb

## Test area

In [15]:
import json
test_user = json.dumps({"id": "082a5d87-5cb2-41d1-a29d-601b38d9f380", "username": "aeroxye12", "A_gender": "No "
                                                                                                        "preference",
              "A_primarycolor": ["No preference"
    ], "A_agegroup": ["Kitten", "Juvenile", "Adult"
    ], "A_energy": "Chill", "A_attention": "No preference", "A_sweetspicy": "All sweet", "A_firstcat": 1, "A_othercats": -1, "A_otherdogs": -1, "A_kids": 1, "A_employment": "Working Full Time", "A_homeownership": "Self-Owned", "A_allergies": -1, "A_adoptionfee": 1, "createdAt": "2023-05-17T03: 54: 09.823Z", "updatedAt": "2023-05-17T03: 54: 09.823Z"
})
test_json = json.loads(test_user)

In [16]:
test_json

{'id': '082a5d87-5cb2-41d1-a29d-601b38d9f380',
 'username': 'aeroxye12',
 'A_gender': 'No preference',
 'A_primarycolor': ['No preference'],
 'A_agegroup': ['Kitten', 'Juvenile', 'Adult'],
 'A_energy': 'Chill',
 'A_attention': 'No preference',
 'A_sweetspicy': 'All sweet',
 'A_firstcat': 1,
 'A_othercats': -1,
 'A_otherdogs': -1,
 'A_kids': 1,
 'A_employment': 'Working Full Time',
 'A_homeownership': 'Self-Owned',
 'A_allergies': -1,
 'A_adoptionfee': 1,
 'createdAt': '2023-05-17T03: 54: 09.823Z',
 'updatedAt': '2023-05-17T03: 54: 09.823Z'}

In [24]:
user_df = pd.read_json(test_user, orient='index').T
user_df

Unnamed: 0,id,username,A_gender,A_primarycolor,A_agegroup,A_energy,A_attention,A_sweetspicy,A_firstcat,A_othercats,A_otherdogs,A_kids,A_employment,A_homeownership,A_allergies,A_adoptionfee,createdAt,updatedAt
0,082a5d87-5cb2-41d1-a29d-601b38d9f380,aeroxye12,No preference,[No preference],"[Kitten, Juvenile, Adult]",Chill,No preference,All sweet,1,-1,-1,1,Working Full Time,Self-Owned,-1,1,2023-05-17T03: 54: 09.823Z,2023-05-17T03: 54: 09.823Z


In [90]:
cl_users = user_df.rename(columns={'A_gender': 'gender',
                                 'A_primarycolor': 'primary_color',
                                 'A_agegroup': 'age_group',
                                 'A_energy': 'energy_level',
                                 'A_attention': 'attention_need',
                                 'A_sweetspicy': 'personality',
                                 'A_firstcat': 'is_first_cat',
                                 'A_othercats': 'has_other_cats',
                                 'A_otherdogs': 'good_with_other_dogs',
                                 'A_kids': 'good_with_kids',
                                 'A_employment': 'employment',
                                 'A_homeownership': 'home_ownership',
                                 'A_allergies': 'has_allergies',
                                 'A_adoptionfee': 'agree_to_fee',
                                 'createdAt': 'created_at',
                                 'updatedAt': 'updated_at',
                                 })
cl_users

Unnamed: 0,id,agree_to_fee,age_group,has_allergies,attention_need,employment,energy_level,is_first_cat,gender,home_ownership,good_with_kids,has_other_cats,good_with_other_dogs,primary_color,personality,created_at,updated_at,username
0,8b1aa3c3-162f-4717-a9f5-d895ec52f2a4,1,No preference,-1,No preference,Working Full Time,No preference,1,No preference,Self-Owned,1,-1,1,No preference,Anything is nice,2023-05-17T09:04:07.762Z,2023-05-17T09:04:07.762Z,Chai Chai


In [91]:
# clean multi-select columns with No Preference options (age, color)
def clean_multi_select(row):
    if isinstance(row, str):
        arr = row.split(',')
        if (len(arr) > 1) and ('No preference' in arr):
            arr.remove('No preference')
        return [s.lower() for s in arr]
    else:
        arr = [o.get("S") for o in row]
        if (len(arr) > 1) and ('No preference' in arr):
            arr.remove('No preference')
        return [s.lower() for s in arr]

In [92]:
import re
cl_users['age_group'] = cl_users['age_group'].map(lambda choice: clean_multi_select(choice))
cl_users['primary_color'] = cl_users['primary_color'].map(lambda choice: clean_multi_select(choice))

# split columns with list (age, color)
age_groups = ['age_kitten', 'age_juvenile', 'age_no_preference', 'age_adult', 'age_senior']
split_age_groups = cl_users['age_group'].map(lambda row: ','.join([str(age in row) for age in age_groups]))
new_age_columns = split_age_groups.str.split(',', expand=True)
new_age_columns = new_age_columns.applymap(lambda val: (val == 'True'))
# new_age_columns = new_age_columns.astype('bool')
new_age_columns = new_age_columns.astype('int')

pattern = re.compile(r'\s|/')
# new_age_columns.columns = [f'age_{pattern.sub("_", age).lower()}' for age in age_groups]
cl_users[[f'age_{pattern.sub("_", age).lower()}' for age in age_groups]] = new_age_columns
cl_users = cl_users.drop('age_group', axis=1)

color_groups = ['primary_color_no_preference', 'primary_color_black', 'primary_color_calico_tortie',
                'primary_color_tabby', 'primary_color_others', 'primary_color_ginger', 'primary_color_white']
split_color_groups = cl_users['primary_color'].map(
    lambda row: ','.join([str(color in row) for color in color_groups]))
new_color_columns = split_color_groups.str.split(',', expand=True)
new_color_columns = new_color_columns.applymap(lambda val: (val == 'True'))
# new_color_columns = new_color_columns.astype('bool')
new_color_columns = new_color_columns.astype('int')

In [98]:
processed_user = cl_users.to_dict('records')

TypeError: unhashable type: 'list'

In [101]:
cl_users.loc[0, 'created_at']

'2023-05-17T09:04:07.762Z'