In [1]:
import pandas as pd
import json
import ast
import plotly.express as px
import math

In [2]:
jamstones = pd.read_csv('jamstones.csv')
lavval = pd.read_csv('lavval.csv')
newagefsg = pd.read_csv('newage.csv')

In [3]:
# Preprocessing

#helper functions
def trim_all_columns(df):
    """
    Trim whitespace from ends of each value across all series in dataframe
    """
    trim_strings = lambda x: x.strip() if isinstance(x, str) else x
    return df.applymap(trim_strings)
    
def check_format(row):
    if type(row)==str:
        if ((row[0] == '['and row[len(row)-1] == ']' and row[1] == '{'and row[len(row)-2] == '}') or (row[0] == '{'and row[len(row)-1] == '}') )and len(row)>10:
            return True
    else:
        return False

def preprocessing(df):
    df_copy = df.copy()[['id','line_items']]
    df_copy.dropna(inplace = True)
    
    df_copy = trim_all_columns(df_copy)
    df_copy.line_items = df_copy.line_items.apply(eval).apply(json.dumps)

    def clean_json(x):
        "Create apply function for decoding JSON"
        return json.loads(x)

    df_copy['line_items'] = df_copy['line_items'].apply(clean_json)  
    
    return df_copy
   


In [4]:
jamstones_preprocessed = preprocessing(jamstones)
lavval_preprocessed = preprocessing(lavval)
newagefsg_preprocessed = preprocessing(newagefsg)
items_df = pd.concat([jamstones_preprocessed, lavval_preprocessed, newagefsg_preprocessed], ignore_index=True)
# items_df = preprocessing(jamstones)

In [5]:
type(items_df['line_items'][200][0]['quantity'])

int

In [6]:
items_df

Unnamed: 0,id,line_items
0,4093536043104,"[{'id': 10557966286944, 'admin_graphql_api_id'..."
1,4093506060384,"[{'id': 10557885513824, 'admin_graphql_api_id'..."
2,4093373120608,"[{'id': 10557541810272, 'admin_graphql_api_id'..."
3,4093166813280,"[{'id': 10557052059744, 'admin_graphql_api_id'..."
4,4092837363808,"[{'id': 10556246884448, 'admin_graphql_api_id'..."
...,...,...
13934,2674319327393,"[{'id': 5785053888673, 'admin_graphql_api_id':..."
13935,2673968054433,"[{'id': 5784513413281, 'admin_graphql_api_id':..."
13936,2673190043809,"[{'id': 5783188865185, 'admin_graphql_api_id':..."
13937,2655117574305,"[{'id': 5752646762657, 'admin_graphql_api_id':..."


In [48]:
# items_df['line_items'][223]

In [8]:
test = "Agate Amazonite Amber Amethyst Ametrine Ammonite Angelite Apache Tear (Obsidian) Apatite Aquamarine Aragonite Auralite 23 Aventurine Azeztulite Azurite Beryl Black Rutilated Quartz Black Spinel Black Tourmaline Bloodstone Blue Lace Agate Boji Stone Bronzite Calcite Carborundum Carnelian Celestite Chalcedony Charoite Chiastolite Chrysoberyl Chrysocolla Chrysoprase Citrine Clear Quartz Dalmatian Jasper Danburite Dendritic Agate Diamond Diopside Emerald Enhydro Quartz Fire Agate Fire Opal Fluorite Fossil Coral Jasper Freshwater Pearl Fuchsite Garnet Gold Rutilated Quartz Gypsum Hematite Herkimer Diamond Hessonite Howlite Hypersthene Imperial Topaz Indicolite Iolite Iron Nickel Meteorite Iron Pyrite Jade Jasper Jet K2 Kunzite Kyanite Labradorite Landscape Jasper Lapis Lazuli Larimar Lemurian Seed Lepidolite Libyan Gold Tektite Malachite Mangano Calcite Moldavite Mookaite Jasper Moonstone Morganite Moss Agate Nebula Stone Obsidian Onyx Opal Peridot Petrified Wood Phantom Quartz Pietersite Prehnite Red Jasper Red Rutilated Quartz Rhodochrosite Rhodonite Rose Quartz Ruby Sapphire Selenite Seraphinite Serpentine Shungite Smoky Quartz Snowflake Obsidian Sodalite Spinel Spirit Quartz Staurolite Sugilite Sunstone Tanzanite Tektite Tibetan Quartz Tiger's Eye Topaz Tourmaline Turquoise Unakite Watermelon Tourmaline Zebra Jasper Zoisite"
test_list = test.split()
test_list

['Agate',
 'Amazonite',
 'Amber',
 'Amethyst',
 'Ametrine',
 'Ammonite',
 'Angelite',
 'Apache',
 'Tear',
 '(Obsidian)',
 'Apatite',
 'Aquamarine',
 'Aragonite',
 'Auralite',
 '23',
 'Aventurine',
 'Azeztulite',
 'Azurite',
 'Beryl',
 'Black',
 'Rutilated',
 'Quartz',
 'Black',
 'Spinel',
 'Black',
 'Tourmaline',
 'Bloodstone',
 'Blue',
 'Lace',
 'Agate',
 'Boji',
 'Stone',
 'Bronzite',
 'Calcite',
 'Carborundum',
 'Carnelian',
 'Celestite',
 'Chalcedony',
 'Charoite',
 'Chiastolite',
 'Chrysoberyl',
 'Chrysocolla',
 'Chrysoprase',
 'Citrine',
 'Clear',
 'Quartz',
 'Dalmatian',
 'Jasper',
 'Danburite',
 'Dendritic',
 'Agate',
 'Diamond',
 'Diopside',
 'Emerald',
 'Enhydro',
 'Quartz',
 'Fire',
 'Agate',
 'Fire',
 'Opal',
 'Fluorite',
 'Fossil',
 'Coral',
 'Jasper',
 'Freshwater',
 'Pearl',
 'Fuchsite',
 'Garnet',
 'Gold',
 'Rutilated',
 'Quartz',
 'Gypsum',
 'Hematite',
 'Herkimer',
 'Diamond',
 'Hessonite',
 'Howlite',
 'Hypersthene',
 'Imperial',
 'Topaz',
 'Indicolite',
 'Iolite',
 

In [9]:
crystals = ['Agate',
 'Amazonite',
 'Amber',
 'Amethyst',
 'Ametrine',
 'Ammonite',
 'Angelite',
 'Apache Tear',
 'Apatite',
 'Aquamarine',
 'Aragonite',
 'Auralite 23',
 'Aventurine',
 'Azeztulite',
 'Azurite',
 'Beryl',
 'Black Rutilated Quartz',
 'Black Spinel',
 'Black Tourmaline',
 'Bloodstone',
 'Blue Lace Agate',
 'Boji Stone',
 'Bronzite',
 'Calcite',
 'Carborundum',
 'Carnelian',
 'Celestite',
 'Chalcedony',
 'Charoite',
 'Chiastolite',
 'Chrysoberyl',
 'Chrysocolla',
 'Chrysoprase',
 'Citrine',
 'Clear Quartz',
 'Dalmatian Jasper',
 'Danburite',
 'Dendritic',
 'Agate',
 'Diamond',
 'Diopside',
 'Emerald',
 'Enhydro Quartz',
 'Fire Agate',
 'Fire Opal',
 'Fluorite',
 'Fossil Coral Jasper',
 'Freshwater Pearl',
 'Fuchsite',
 'Garnet',
 'Gold Rutilated Quartz',
 'Gypsum',
 'Hematite',
 'Herkimer Diamond',
 'Hessonite',
 'Howlite',
 'Hypersthene',
 'Imperial Topaz',
 'Indicolite',
 'Iolite',
 'Iron Nickel Meteorite',
 'Iron Pyrite',
 'Jade',
 'Jasper',
 'Jet',
 'K2',
 'Kunzite',
 'Kyanite',
 'Labradorite',
 'Landscape Jasper',
 'Lapis Lazuli',
 'Larimar',
 'Lemurian Seed',
 'Lepidolite',
 'Libyan Gold Tektite',
 'Malachite',
 'Mangano Calcite',
 'Moldavite',
 'Mookaite Jasper',
 'Moonstone',
 'Morganite',
 'Moss Agate',
 'Nebula Stone',
 'Obsidian',
 'Onyx',
 'Opal',
 'Peridot',
 'Petrified Wood',
 'Phantom Quartz',
 'Pietersite',
 'Prehnite',
 'Red Jasper',
 'Red Rutilated Quartz',
 'Rhodochrosite',
 'Rhodonite',
 'Rose Quartz',
 'Ruby',
 'Sapphire',
 'Selenite',
 'Seraphinite',
 'Serpentine',
 'Shungite',
 'Smoky Quartz',
 'Snowflake Obsidian',
 'Sodalite',
 'Spinel',
 'Spirit Quartz',
 'Staurolite',
 'Sugilite',
 'Sunstone',
 'Tanzanite',
 'Tektite',
 'Tibetan Quartz',
 "Tiger's Eye",
 'Topaz',
 'Tourmaline',
 'Turquoise',
 'Unakite',
 'Watermelon Tourmaline',
 'Zebra Jasper',
 'Zoisite']

In [10]:
len(crystals)

121

In [11]:
crystals = [crystal.lower() for crystal in crystals]
crystals

['agate',
 'amazonite',
 'amber',
 'amethyst',
 'ametrine',
 'ammonite',
 'angelite',
 'apache tear',
 'apatite',
 'aquamarine',
 'aragonite',
 'auralite 23',
 'aventurine',
 'azeztulite',
 'azurite',
 'beryl',
 'black rutilated quartz',
 'black spinel',
 'black tourmaline',
 'bloodstone',
 'blue lace agate',
 'boji stone',
 'bronzite',
 'calcite',
 'carborundum',
 'carnelian',
 'celestite',
 'chalcedony',
 'charoite',
 'chiastolite',
 'chrysoberyl',
 'chrysocolla',
 'chrysoprase',
 'citrine',
 'clear quartz',
 'dalmatian jasper',
 'danburite',
 'dendritic',
 'agate',
 'diamond',
 'diopside',
 'emerald',
 'enhydro quartz',
 'fire agate',
 'fire opal',
 'fluorite',
 'fossil coral jasper',
 'freshwater pearl',
 'fuchsite',
 'garnet',
 'gold rutilated quartz',
 'gypsum',
 'hematite',
 'herkimer diamond',
 'hessonite',
 'howlite',
 'hypersthene',
 'imperial topaz',
 'indicolite',
 'iolite',
 'iron nickel meteorite',
 'iron pyrite',
 'jade',
 'jasper',
 'jet',
 'k2',
 'kunzite',
 'kyanite',

In [49]:
#name, quantity
#transaction set per row trans_list = [name, name2, name2 ...]
def remove_suffix(input_string, suffix):
    if suffix and input_string.endswith(suffix):
        return input_string[:-len(suffix)]
    return input_string

categories = ['bracelet', 'necklace','pendant', 'earrings', 'ring', 'lamp', 'bottle', 'tower', 'pendulum', 'bead', 'tumble', 'display', 'pouches', 'kits', 'bag charm', 'charm', 'bag', 'kit', 'tree', 'pouch', 'solitaire', 'pointer', 'hedgehog', 'chip']

crystals = ['agate',
 'amazonite',
 'amber',
 'amethyst',
 'ametrine',
 'ammonite',
 'angelite',
 'apache tear',
 'apatite',
 'aquamarine',
 'aragonite',
 'auralite 23',
 'aventurine',
 'azeztulite',
 'azurite',
 'beryl',
 'black rutilated quartz',
 'black spinel',
 'black tourmaline',
 'bloodstone',
 'blue lace agate',
 'boji stone',
 'bronzite',
 'calcite',
 'carborundum',
 'carnelian',
 'celestite',
 'chalcedony',
 'charoite',
 'chiastolite',
 'chrysoberyl',
 'chrysocolla',
 'chrysoprase',
 'citrine',
 'clear quartz',
 'dalmatian jasper',
 'danburite',
 'dendritic',
 'agate',
 'diamond',
 'diopside',
 'emerald',
 'enhydro quartz',
 'fire agate',
 'fire opal',
 'fluorite',
 'fossil coral jasper',
 'freshwater pearl',
 'fuchsite',
 'garnet',
 'gold rutilated quartz',
 'gypsum',
 'hematite',
 'herkimer diamond',
 'hessonite',
 'howlite',
 'hypersthene',
 'imperial topaz',
 'indicolite',
 'iolite',
 'iron nickel meteorite',
 'iron pyrite',
 'jade',
 'jasper',
 'jet',
 'k2',
 'kunzite',
 'kyanite',
 'labradorite',
 'landscape jasper',
 'lapis lazuli',
 'larimar',
 'lemurian seed',
 'lepidolite',
 'libyan gold tektite',
 'malachite',
 'mangano calcite',
 'moldavite',
 'mookaite jasper',
 'moonstone',
 'morganite',
 'moss agate',
 'nebula stone',
 'obsidian',
 'onyx',
 'opal',
 'peridot',
 'petrified wood',
 'phantom quartz',
 'pietersite',
 'prehnite',
 'red jasper',
 'red rutilated quartz',
 'rhodochrosite',
 'rhodonite',
 'rose quartz',
 'ruby',
 'sapphire',
 'selenite',
 'seraphinite',
 'serpentine',
 'shungite',
 'smoky quartz',
 'snowflake obsidian',
 'sodalite',
 'spinel',
 'spirit quartz',
 'staurolite',
 'sugilite',
 'sunstone',
 'tanzanite',
 'tektite',
 'tibetan quartz',
 "tiger's eye",
 'topaz',
 'tourmaline',
 'turquoise',
 'unakite',
 'watermelon tourmaline',
 'zebra jasper',
 'zoisite']

def get_category(name):
    for category in categories:
        if category in name.lower():
            return category
    else:
        return 'others'
    
def get_crystal(name):
    for crystal in crystals:
        if crystal in name.lower():
            return crystal
    else:
        return 'others'
    
def remove_live_sale(sublist):
    remove_substring = 'Live Sale $1 Listing'
    new_sublist = []
    for item in sublist:
        if remove_substring not in item:
            new_sublist.append(item)
            
    return new_sublist
    
def items_per_transaction(row):
    num_items = len(row)
    trans_list = []
    categories_list = []
    crystals_list = []
    
    #items
    for i in range(num_items):
        temp_quantity = row[i]['quantity']
        for j in range(temp_quantity):
            substring = '(Not found on Shopify)'
            name = row[i]['name']
            if substring in name:
                name = remove_suffix(name, substring)
            trans_list.append(name)
    
    trans_list = remove_live_sale(trans_list)
            
    #categories and crystals
    for item in trans_list:
        category = get_category(item)
        crystal = get_crystal(item)
        categories_list.append(category)
        crystals_list.append(crystal)
             
    return trans_list, categories_list, crystals_list


def get_unique_items(some_list):
    flat_list = [item for sublist in some_list for item in sublist]
    unique_list = []
    for item in flat_list:
        if item not in unique_list:
            unique_list.append(item)
    return unique_list
    
def transaction_items(df):
    df_copy = preprocessing(df)
    line_items_list = df_copy.line_items.tolist()
    items_per_transaction_list = [items_per_transaction(row) for row in line_items_list]
    df_copy['items_per_transaction'] = items_per_transaction_list   
    
    unique_items = get_unique_items(items_per_transaction_list)
            
    return unique_items

def remove_empty_transactions(row):
    for item in row:
        if item == []:
            row.remove(item)
            
    return row

# unique_items = transaction_items(jamstones)

In [50]:
line_items_list = items_df.line_items.tolist()
items_per_transaction_list = [items_per_transaction(row)[0] for row in line_items_list]
categories_per_transaction_list = [items_per_transaction(row)[1] for row in line_items_list]
crystals_per_transaction_list = [items_per_transaction(row)[2] for row in line_items_list]
items_per_transaction_list = remove_empty_transactions(items_per_transaction_list)
categories_per_transaction_list = remove_empty_transactions(categories_per_transaction_list)
crystals_per_transaction_list = remove_empty_transactions(crystals_per_transaction_list)
d = {'items_per_transaction': items_per_transaction_list, 'categories_per_transaction': categories_per_transaction_list, 'crystals_per_transaction': crystals_per_transaction_list}
temp_df = pd.DataFrame(data=d)
temp_df


Unnamed: 0,items_per_transaction,categories_per_transaction,crystals_per_transaction
0,"[[READY PRODUCT] Crystal Bag Charm Jamstones ,...","[bag charm, bag charm]","[others, others]"
1,"[[READY PRODUCT] Crystal Bag Charm Jamstones ,...","[bag charm, bag charm]","[others, others]"
2,"[[READY PRODUCT] Crystal Bag Charm Jamstones ,...","[bag charm, bag charm]","[others, others]"
3,[[Jamstones x Lavval] Sugar & Spice Necklace (...,"[necklace, necklace, bracelet]","[others, others, others]"
4,"[Career (Calmness, Healing, Wisdom, Wealth), B...","[others, necklace, necklace]","[others, others, others]"
...,...,...,...
13268,"[Howlite 6mm Bracelet, Black Tourmaline 6mm Br...","[bracelet, bracelet, bracelet, bracelet, brace...","[howlite, black tourmaline, bronzite, rose qua..."
13269,"[Aventurine 8mm Bracelet, Ruby Zosite Pointer]","[bracelet, pointer]","[aventurine, ruby]"
13270,"[Hematite Tumble, Clear Quartz Tumble, Mini Se...","[tumble, tumble, tower, pouch, pouch]","[hematite, clear quartz, selenite, others, oth..."
13271,[TEST 1],[others],[others]


In [51]:
crystals_per_transaction_list

[['others', 'others'],
 ['others', 'others'],
 ['others', 'others'],
 ['others', 'others', 'others'],
 ['others', 'others', 'others'],
 ['others', 'others'],
 ['others'],
 ['others'],
 ['others', 'others'],
 ['others', 'selenite'],
 ['others', 'angelite'],
 ['others', 'others'],
 ['others', 'others'],
 ['others'],
 ['others'],
 ['others', 'others', 'selenite', 'selenite'],
 ['others'],
 ['others'],
 ['others'],
 ['others'],
 ['selenite', 'others'],
 ['rose quartz'],
 ['others'],
 ['others'],
 ['others'],
 ['others', 'others'],
 ['others'],
 ['rose quartz'],
 ['others'],
 ['others'],
 ['angelite'],
 ['others'],
 ['selenite'],
 ['selenite', 'selenite'],
 ['agate'],
 ['others'],
 ['others'],
 ['others'],
 ['others'],
 ['others'],
 ['agate'],
 ['selenite', 'others'],
 ['others', 'agate'],
 ['lepidolite',
  'howlite',
  'agate',
  'selenite',
  'rhodonite',
  'others',
  'citrine',
  'amazonite',
  'aquamarine'],
 ['others'],
 ['others'],
 ['others', 'others', 'others', 'others', 'others'],

In [52]:
# copy_df.to_csv('copy_df.csv')

In [53]:
items_per_transaction_list

[['[READY PRODUCT] Crystal Bag Charm Jamstones ',
  '[READY PRODUCT] Crystal Bag Charm Jamstones '],
 ['[READY PRODUCT] Crystal Bag Charm Jamstones ',
  '[READY PRODUCT] Crystal Bag Charm Jamstones '],
 ['[READY PRODUCT] Crystal Bag Charm Jamstones ',
  '[READY PRODUCT] Crystal Bag Charm Jamstones '],
 ['[Jamstones x Lavval] Sugar & Spice Necklace (Love, Wealth and Weight-Loss) ',
  '[Jamstones x Lavval] Sugar & Spice Necklace (Love, Wealth and Weight-Loss) ',
  '[Jamstones x Lavval] Dubai Crystal Bracelet [Wisdom] '],
 ['Career (Calmness, Healing, Wisdom, Wealth)',
  'Blue Tiger Eye Tri Pearl Necklace',
  'Phantom Garden Globe Quartz Necklace'],
 ['Mel (Calmness, Wisdom, Wealth, Protection)',
  'Peachy (Happiness, Self confidence, Female Wellness and Healing)'],
 ['[Jamstones x Lavval] Healing Crystal Tree '],
 ['[Jamstones x Lavval] Positivity  - Shopee Exclusive Crystal Bracelet [Love and Fertility] '],
 ['[Jamstones x Lavval] Solitaire Crystal Necklace [Choose from 20+ centerpieces

In [54]:
categories_per_transaction_list

[['bag charm', 'bag charm'],
 ['bag charm', 'bag charm'],
 ['bag charm', 'bag charm'],
 ['necklace', 'necklace', 'bracelet'],
 ['others', 'necklace', 'necklace'],
 ['others', 'others'],
 ['tree'],
 ['bracelet'],
 ['necklace', 'tree'],
 ['tree', 'others'],
 ['bracelet', 'bracelet'],
 ['bottle', 'bag charm'],
 ['bracelet', 'bracelet'],
 ['bracelet'],
 ['others'],
 ['ring', 'charm', 'kit', 'kit'],
 ['others'],
 ['bracelet'],
 ['others'],
 ['bracelet'],
 ['lamp', 'others'],
 ['pointer'],
 ['bracelet'],
 ['bag charm'],
 ['others'],
 ['bracelet', 'others'],
 ['bag'],
 ['earrings'],
 ['bracelet'],
 ['others'],
 ['bracelet'],
 ['bracelet'],
 ['kit'],
 ['kit', 'kit'],
 ['bracelet'],
 ['bracelet'],
 ['bracelet'],
 ['necklace'],
 ['bracelet'],
 ['others'],
 ['bracelet'],
 ['tower', 'bracelet'],
 ['bracelet', 'bracelet'],
 ['tumble',
  'tumble',
  'tumble',
  'others',
  'tumble',
  'tumble',
  'pointer',
  'tumble',
  'tumble'],
 ['bracelet'],
 ['bag charm'],
 ['bag', 'bag', 'bag', 'bag', 'bag'],

In [55]:
# !pip install mlxtend

### MBA Using Items

In [56]:
import mlxtend
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(items_per_transaction_list).transform(items_per_transaction_list)
te_ary   

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [57]:
mba_df = pd.DataFrame(te_ary, columns=te.columns_)
mba_df.describe()

Unnamed: 0,$4 courier for JS5331,$50 gift voucher,*14mmLavender rose quartz solitaire,*DIRECT IMPORT from South America Palo Santo Wood Bundle,*PREMIUM* Asterism Lavender Rose Quartz 40mm Sphere,*PREMIUM* Asterism Lavender Rose Quartz 72mm Sphere,*PREMIUM* Black Tourmalinated Quartz Tumble,*PREMIUM* Citrine Chips,*PREMIUM* Fluorite 6mm Bracelet,*PREMIUM* Fluorite Beads (20cm bracelet top-up),...,✨PROMO✨ Anti - Radiation Kit - Yes! I wish to add in the Labrdorite Crystal Grip,万事如意 (Success in everything),和气生财 (Harmony brings wealth),心想事成,心想事成 (Wish come true),心想事成 (Wish come true) - 14mm,心想事成 (Wish come true) - 16mm,恭喜发财 [Happiness and Prosperity],財源滾滾 [Endless Wealth],财源广进 (Endless fortune)
count,13273,13273,13273,13273,13273,13273,13273,13273,13273,13273,...,13273,13273,13273,13273,13273,13273,13273,13273,13273,13273
unique,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
top,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
freq,13272,13272,13272,13271,13272,13272,13270,13251,13271,13272,...,13272,13249,13268,13272,13272,13266,13270,13267,13231,13268


In [58]:
from mlxtend.frequent_patterns import apriori

#Now, let us return the items and itemsets with at least 1% support:
ap_items = mlxtend.frequent_patterns.apriori(mba_df, min_support = 0.01, use_colnames = True)
ap_items

Unnamed: 0,support,itemsets
0,0.012808,(Wealth Magnet Bottle (U.P.$3.90) )
1,0.012733,([Jamstones x Lavval] Amethyst Eye Pillow )
2,0.011753,([Jamstones x Lavval] Healing Crystal Tree )


In [59]:
rules_items=mlxtend.frequent_patterns.association_rules(
ap_items, metric="confidence",min_threshold=0,support_only=False)
rules_items

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction


In [60]:
frequent_itemsets = apriori(mba_df, min_support = 0.01, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.012808,(Wealth Magnet Bottle (U.P.$3.90) ),1
1,0.012733,([Jamstones x Lavval] Amethyst Eye Pillow ),1
2,0.011753,([Jamstones x Lavval] Healing Crystal Tree ),1


In [61]:
# getting th item sets with length = 2 and support more han 1%

frequent_itemsets[ (frequent_itemsets['length'] == 2) &
                   (frequent_itemsets['support'] >= 0.01) ]

Unnamed: 0,support,itemsets,length


### MBA using categories

In [62]:
import mlxtend
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary2 = te.fit(categories_per_transaction_list).transform(categories_per_transaction_list)
te_ary2   

array([[False,  True, False, ..., False, False, False],
       [False,  True, False, ..., False, False, False],
       [False,  True, False, ..., False, False, False],
       ...,
       [False, False, False, ...,  True, False,  True],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [63]:
mba_df2 = pd.DataFrame(te_ary2, columns=te.columns_)
mba_df2.describe()

Unnamed: 0,bag,bag charm,bead,bottle,bracelet,charm,chip,display,earrings,hedgehog,...,others,pendant,pendulum,pointer,pouch,ring,solitaire,tower,tree,tumble
count,13273,13273,13273,13273,13273,13273,13273,13273,13273,13273,...,13273,13273,13273,13273,13273,13273,13273,13273,13273,13273
unique,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
top,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
freq,13162,12919,12964,12759,10421,12994,13218,13265,12740,13223,...,7342,12894,13242,12868,13104,12792,12763,12966,13003,12786


In [64]:
from mlxtend.frequent_patterns import apriori

#Now, let us return the items and itemsets with at least 5% support:
ap_categories = mlxtend.frequent_patterns.apriori(mba_df2, min_support = 0.01, use_colnames = True)
ap_categories

Unnamed: 0,support,itemsets
0,0.026671,(bag charm)
1,0.02328,(bead)
2,0.038725,(bottle)
3,0.214872,(bracelet)
4,0.02102,(charm)
5,0.040157,(earrings)
6,0.019061,(kit)
7,0.027198,(lamp)
8,0.096813,(necklace)
9,0.553153,(others)


In [65]:
rules_categories = mlxtend.frequent_patterns.association_rules(
ap_categories, metric="confidence",min_threshold=0,support_only=False)
rules_categories

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(others),(bottle),0.553153,0.038725,0.012883,0.023291,0.601434,-0.008538,0.984197
1,(bottle),(others),0.038725,0.553153,0.012883,0.332685,0.601434,-0.008538,0.669619
2,(necklace),(bracelet),0.096813,0.214872,0.011452,0.118288,0.550503,-0.009351,0.890458
3,(bracelet),(necklace),0.214872,0.096813,0.011452,0.053296,0.550503,-0.009351,0.954033
4,(others),(bracelet),0.553153,0.214872,0.053191,0.096159,0.447517,-0.065667,0.868657
5,(bracelet),(others),0.214872,0.553153,0.053191,0.247546,0.447517,-0.065667,0.593853
6,(others),(earrings),0.553153,0.040157,0.01439,0.026015,0.64783,-0.007823,0.98548
7,(earrings),(others),0.040157,0.553153,0.01439,0.358349,0.64783,-0.007823,0.696402
8,(others),(lamp),0.553153,0.027198,0.012205,0.022065,0.811265,-0.002839,0.994751
9,(lamp),(others),0.027198,0.553153,0.012205,0.448753,0.811265,-0.002839,0.810612


In [66]:
frequent_itemsets = apriori(mba_df2, min_support = 0.01, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.026671,(bag charm),1
1,0.02328,(bead),1
2,0.038725,(bottle),1
3,0.214872,(bracelet),1
4,0.02102,(charm),1
5,0.040157,(earrings),1
6,0.019061,(kit),1
7,0.027198,(lamp),1
8,0.096813,(necklace),1
9,0.553153,(others),1


In [67]:
# getting th item sets with length = 2 and support more han 10%

frequent_itemsets[ (frequent_itemsets['length'] == 2) &
                   (frequent_itemsets['support'] >= 0.01) ]

Unnamed: 0,support,itemsets,length
18,0.012883,"(others, bottle)",2
19,0.011452,"(necklace, bracelet)",2
20,0.053191,"(others, bracelet)",2
21,0.01439,"(others, earrings)",2
22,0.012205,"(others, lamp)",2
23,0.033225,"(others, necklace)",2
24,0.015294,"(others, pointer)",2
25,0.011452,"(ring, others)",2
26,0.016876,"(others, solitaire)",2
27,0.012431,"(others, tower)",2


In [68]:
# type of stone
# names (?) - confidence, wealth (???)

## MBA Using Crystals

In [69]:
import mlxtend
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary3 = te.fit(crystals_per_transaction_list).transform(crystals_per_transaction_list)
te_ary3

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [70]:
mba_df3 = pd.DataFrame(te_ary3, columns=te.columns_)
mba_df3.describe()

Unnamed: 0,agate,amazonite,amber,amethyst,ametrine,ammonite,angelite,apatite,aquamarine,aragonite,...,sodalite,sugilite,sunstone,tanzanite,tektite,tiger's eye,topaz,tourmaline,turquoise,zoisite
count,13273,13273,13273,13273,13273,13273,13273,13273,13273,13273,...,13273,13273,13273,13273,13273,13273,13273,13273,13273,13273
unique,2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
top,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
freq,12471,13159,13263,12564,13255,13270,13048,13238,12951,13247,...,13176,13269,13184,13250,13265,13161,13242,13149,13269,13268


In [71]:
from mlxtend.frequent_patterns import apriori

#Now, let us return the items and itemsets with at least 5% support:
ap_crystals = mlxtend.frequent_patterns.apriori(mba_df3, min_support = 0.01, use_colnames = True)
ap_crystals

Unnamed: 0,support,itemsets
0,0.060423,(agate)
1,0.053417,(amethyst)
2,0.016952,(angelite)
3,0.02426,(aquamarine)
4,0.010548,(aventurine)
5,0.010548,(black tourmaline)
6,0.034581,(citrine)
7,0.015144,(clear quartz)
8,0.01665,(fluorite)
9,0.053191,(jade)


In [72]:
rules_crystals = mlxtend.frequent_patterns.association_rules(
ap_crystals, metric="confidence",min_threshold=0,support_only=False)
rules_crystals

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(others),(agate),0.693136,0.060423,0.025541,0.036848,0.609827,-0.016341,0.975522
1,(agate),(others),0.060423,0.693136,0.025541,0.422693,0.609827,-0.016341,0.531543
2,(others),(amethyst),0.693136,0.053417,0.023958,0.034565,0.647086,-0.013067,0.980474
3,(amethyst),(others),0.053417,0.693136,0.023958,0.448519,0.647086,-0.013067,0.556435
4,(citrine),(others),0.034581,0.693136,0.016198,0.46841,0.675783,-0.007771,0.577256
5,(others),(citrine),0.693136,0.034581,0.016198,0.02337,0.675783,-0.007771,0.98852
6,(others),(jade),0.693136,0.053191,0.025767,0.037174,0.69888,-0.011102,0.983365
7,(jade),(others),0.053191,0.693136,0.025767,0.484419,0.69888,-0.011102,0.59518
8,(moonstone),(others),0.044376,0.693136,0.020869,0.470289,0.678494,-0.009889,0.579303
9,(others),(moonstone),0.693136,0.044376,0.020869,0.030109,0.678494,-0.009889,0.98529


In [73]:
frequent_itemsets = apriori(mba_df3, min_support = 0.01, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.060423,(agate),1
1,0.053417,(amethyst),1
2,0.016952,(angelite),1
3,0.02426,(aquamarine),1
4,0.010548,(aventurine),1
5,0.010548,(black tourmaline),1
6,0.034581,(citrine),1
7,0.015144,(clear quartz),1
8,0.01665,(fluorite),1
9,0.053191,(jade),1


In [74]:
# getting th item sets with length = 2 and support more han 10%

frequent_itemsets[ (frequent_itemsets['length'] == 2) &
                   (frequent_itemsets['support'] >= 0.01) ]

Unnamed: 0,support,itemsets,length
18,0.025541,"(others, agate)",2
19,0.023958,"(others, amethyst)",2
20,0.016198,"(citrine, others)",2
21,0.025767,"(others, jade)",2
22,0.020869,"(moonstone, others)",2
23,0.026143,"(others, rose quartz)",2
24,0.036917,"(others, selenite)",2
