In [1]:
import kaggle
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import coverage_error

In [2]:
kaggle.api.authenticate()
kaggle.api.dataset_download_files('nvisagan/cannabis-strains-features', path='./cannabis-strains-features', unzip=True)

In [3]:
data = pd.read_csv('./cannabis-strains-features/Cannabis_Strains_Features.csv')


In [4]:
data


Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."
...,...,...,...,...,...,...
2346,Zeus-Og,hybrid,4.7,"Happy,Uplifted,Relaxed,Euphoric,Energetic","Earthy,Woody,Pine",Zeus OG is a hybrid cross between Pineapple OG...
2347,Zkittlez,indica,4.6,"Relaxed,Happy,Euphoric,Uplifted,Sleepy","Sweet,Berry,Grape",Zkittlez is an indica-dominant mix of Grape Ap...
2348,Zombie-Kush,indica,5.0,"Relaxed,Sleepy,Talkative,Euphoric,Happy","Earthy,Sweet,Spicy/Herbal",Zombie Kush by Ripper Seeds comes from two dif...
2349,Zombie-Og,indica,4.4,"Relaxed,Sleepy,Euphoric,Happy,Hungry","Sweet,Earthy,Pungent",If you’re looking to transform into a flesh-ea...


In [5]:
def cols_for_ranks(col_name):
    ranked_cols_appended = pd.concat([data, data[col_name].str.split(',', expand = True).add_prefix(col_name + '_')], axis = 1)
    return ranked_cols_appended

In [6]:
cols_for_ranks('Flavor')

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Flavor_0,Flavor_1,Flavor_2,Flavor_3
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...,Earthy,Sweet,Citrus,
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...,Flowery,Violet,Diesel,
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...,Spicy/Herbal,Sage,Woody,
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...,Apricot,Citrus,Grapefruit,
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%...",Citrus,Earthy,Orange,
...,...,...,...,...,...,...,...,...,...,...
2346,Zeus-Og,hybrid,4.7,"Happy,Uplifted,Relaxed,Euphoric,Energetic","Earthy,Woody,Pine",Zeus OG is a hybrid cross between Pineapple OG...,Earthy,Woody,Pine,
2347,Zkittlez,indica,4.6,"Relaxed,Happy,Euphoric,Uplifted,Sleepy","Sweet,Berry,Grape",Zkittlez is an indica-dominant mix of Grape Ap...,Sweet,Berry,Grape,
2348,Zombie-Kush,indica,5.0,"Relaxed,Sleepy,Talkative,Euphoric,Happy","Earthy,Sweet,Spicy/Herbal",Zombie Kush by Ripper Seeds comes from two dif...,Earthy,Sweet,Spicy/Herbal,
2349,Zombie-Og,indica,4.4,"Relaxed,Sleepy,Euphoric,Happy,Hungry","Sweet,Earthy,Pungent",If you’re looking to transform into a flesh-ea...,Sweet,Earthy,Pungent,


In [7]:
Counter(cols_for_ranks('Flavor')['Flavor_3']).most_common()

[(None, 2308),
 ('Fruit', 16),
 ('Cheese', 5),
 ('Earthy', 4),
 ('Sweet', 3),
 ('Pine', 3),
 ('Flowery', 2),
 ('Chemical', 1),
 ('Blueberry', 1),
 ('Pineapple', 1),
 ('Pungent', 1),
 ('Berry', 1),
 ('Lemon', 1),
 ('Spicy/Herbal', 1),
 ('Orange', 1),
 ('Grape', 1),
 ('Apricot', 1)]

In [8]:
cols_for_ranks('Flavor')[cols_for_ranks('Flavor')['Flavor'].str.contains('Fruit')].head(5)

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Flavor_0,Flavor_1,Flavor_2,Flavor_3
32,Acid-Dough,sativa,5.0,"Talkative,Giggly,Happy,Hungry,Relaxed","Earthy,Woody,Tree Fruit",Acid Dough by Ripper Seeds is a sativa-dominan...,Earthy,Woody,Tree Fruit,
156,Bc-Sweet-Tooth,indica,4.3,"Uplifted,Happy,Relaxed,Sleepy,Euphoric","Sweet,Honey,Tree,Fruit","Developed in British Columbia by BC Bud Depot,...",Sweet,Honey,Tree,Fruit
163,Banana-Candy,indica,4.2,"Relaxed,Euphoric,Uplifted,Creative,Happy","Tree,Fruit,Earthy,Sweet",Banana Candy is classified as a Indica cannabi...,Tree,Fruit,Earthy,Sweet
166,Banana-Kush,hybrid,4.3,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Sweet,Tropical,Tree,Fruit",This legendary West Coast strain crosses Ghost...,Sweet,Tropical,Tree,Fruit
171,Bangi-Haze,sativa,0.0,"Aroused,Uplifted,Euphoric,Hungry","Berry,Tree,Fruit,Cheese",Bangi Haze by Ace Seeds is an energetic sativa...,Berry,Tree,Fruit,Cheese


In [9]:
cols_for_ranks('Flavor')[cols_for_ranks('Flavor')['Flavor'].str.contains(' ')]

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Flavor_0,Flavor_1,Flavor_2,Flavor_3
5,3-Bears-Og,indica,4.4,"Relaxed,Happy,Sleepy,Creative,Euphoric\n","Sweet, Pungent, Earthy",3 Bears OG by Mephisto Genetics is an autoflow...,Sweet,Pungent,Earthy,
32,Acid-Dough,sativa,5.0,"Talkative,Giggly,Happy,Hungry,Relaxed","Earthy,Woody,Tree Fruit",Acid Dough by Ripper Seeds is a sativa-dominan...,Earthy,Woody,Tree Fruit,
106,Amnesia-Ganja-Haze,sativa,5.0,"Euphoric, Relaxed","Spicy/Herbal, Sweet",Amnesia Ganja Haze is another award-winning st...,Spicy/Herbal,Sweet,,
1237,Las-Vegas-Purple-Kush-Bx,indica,5.0,"Sleepy,Happy,Relaxed,Aroused,Creative","Sweet, Berry, Spicy/Herbal",Las Vegas Purple Kush BX is a clone-only strai...,Sweet,Berry,Spicy/Herbal,


In [10]:
data['Flavor'] = data['Flavor'].str.replace(' ', '')
data['Flavor'] = data['Flavor'].str.replace('Tree,Fruit', 'TreeFruit')
cols_for_ranks('Flavor')

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Flavor_0,Flavor_1,Flavor_2,Flavor_3
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...,Earthy,Sweet,Citrus,
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...,Flowery,Violet,Diesel,
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...,Spicy/Herbal,Sage,Woody,
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...,Apricot,Citrus,Grapefruit,
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%...",Citrus,Earthy,Orange,
...,...,...,...,...,...,...,...,...,...,...
2346,Zeus-Og,hybrid,4.7,"Happy,Uplifted,Relaxed,Euphoric,Energetic","Earthy,Woody,Pine",Zeus OG is a hybrid cross between Pineapple OG...,Earthy,Woody,Pine,
2347,Zkittlez,indica,4.6,"Relaxed,Happy,Euphoric,Uplifted,Sleepy","Sweet,Berry,Grape",Zkittlez is an indica-dominant mix of Grape Ap...,Sweet,Berry,Grape,
2348,Zombie-Kush,indica,5.0,"Relaxed,Sleepy,Talkative,Euphoric,Happy","Earthy,Sweet,Spicy/Herbal",Zombie Kush by Ripper Seeds comes from two dif...,Earthy,Sweet,Spicy/Herbal,
2349,Zombie-Og,indica,4.4,"Relaxed,Sleepy,Euphoric,Happy,Hungry","Sweet,Earthy,Pungent",If you’re looking to transform into a flesh-ea...,Sweet,Earthy,Pungent,


In [11]:
Counter(cols_for_ranks('Flavor')['Flavor_3']).most_common()

[(None, 2342),
 ('Cheese', 4),
 ('Earthy', 2),
 ('Chemical', 1),
 ('Blueberry', 1),
 ('Pungent', 1)]

In [12]:
cols_for_ranks('Flavor')[cols_for_ranks('Flavor')['Flavor_3'] == 'Cheese']

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Flavor_0,Flavor_1,Flavor_2,Flavor_3
195,Bettie-Page,hybrid,4.2,"Creative,Uplifted,Energetic,Euphoric,Relaxed","Earthy,Sweet,Blue,Cheese","Bettie Page, grown by Liberty Reach Farms in W...",Earthy,Sweet,Blue,Cheese
466,Cat-Piss,sativa,3.9,"Happy,Uplifted,Euphoric,Relaxed,Talkative","Earthy,Woody,Blue,Cheese",Originally a clone-only phenotype of Super Sil...,Earthy,Woody,Blue,Cheese
1141,Josh-D-Og,indica,3.7,"Aroused,Tingly,Uplifted,Creative,Euphoric","Berry,Blueberry,Blue,Cheese",Josh D OG by Karma Genetics is a handcrafted O...,Berry,Blueberry,Blue,Cheese
2330,X-Tra-Chz,hybrid,4.0,"Sleepy,Uplifted,Euphoric,Happy,Hungry","Pungent,Skunk,Blue,Cheese","X-tra Chz, bred by MTG Seeds, is a hybrid cros...",Pungent,Skunk,Blue,Cheese


In [13]:
data['Flavor'] = data['Flavor'].str.replace('Blue,Cheese', 'BlueCheese')
cols_for_ranks('Flavor')

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Flavor_0,Flavor_1,Flavor_2
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...,Earthy,Sweet,Citrus
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...,Flowery,Violet,Diesel
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...,Spicy/Herbal,Sage,Woody
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...,Apricot,Citrus,Grapefruit
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%...",Citrus,Earthy,Orange
...,...,...,...,...,...,...,...,...,...
2346,Zeus-Og,hybrid,4.7,"Happy,Uplifted,Relaxed,Euphoric,Energetic","Earthy,Woody,Pine",Zeus OG is a hybrid cross between Pineapple OG...,Earthy,Woody,Pine
2347,Zkittlez,indica,4.6,"Relaxed,Happy,Euphoric,Uplifted,Sleepy","Sweet,Berry,Grape",Zkittlez is an indica-dominant mix of Grape Ap...,Sweet,Berry,Grape
2348,Zombie-Kush,indica,5.0,"Relaxed,Sleepy,Talkative,Euphoric,Happy","Earthy,Sweet,Spicy/Herbal",Zombie Kush by Ripper Seeds comes from two dif...,Earthy,Sweet,Spicy/Herbal
2349,Zombie-Og,indica,4.4,"Relaxed,Sleepy,Euphoric,Happy,Hungry","Sweet,Earthy,Pungent",If you’re looking to transform into a flesh-ea...,Sweet,Earthy,Pungent


In [14]:
set(cols_for_ranks('Flavor')['Flavor_1'])

{'Ammonia',
 'Apple',
 'Apricot',
 'Berry',
 'Berry\n',
 'Bluberry',
 'BlueCheese',
 'Blueberry',
 'Butter',
 'Cheese',
 'Chemical',
 'Chestnut',
 'Citrus',
 'Citrus\n',
 'Coffee',
 'Diesel',
 'Earthy',
 'Earthy\n',
 'Flowery',
 'Grape',
 'Grapefruit',
 'Honey',
 'Lavender',
 'Lemon',
 'Lime',
 'Mango',
 'Menthol',
 'Mint',
 'Minty',
 None,
 'Nutty',
 'Orange',
 'Pear',
 'Pepper',
 'Pine',
 'Pineapple',
 'Plum',
 'Pungent',
 'Rose',
 'Sage',
 'Skunk',
 'Spicy/Herbal',
 'Strawberry',
 'Sweet',
 'Sweet\n',
 'Tea',
 'Tobacco',
 'TreeFruit',
 'Tropical',
 'Vanilla',
 'Violet',
 'Woody'}

In [15]:
data['Flavor'] = data['Flavor'].str.replace('\n', '')
data['Flavor'] = data['Flavor'].str.replace('Bluberry', 'Blueberry')
data['Flavor'] = data['Flavor'].str.replace('Grapes', 'Grape')
data['Flavor'] = data['Flavor'].str.replace('Minty', 'Mint')
data.drop(data[data.Flavor.str.contains('None')].index, axis = 0, inplace = True)
cols_for_ranks('Flavor')

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Flavor_0,Flavor_1,Flavor_2
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...,Earthy,Sweet,Citrus
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...,Flowery,Violet,Diesel
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...,Spicy/Herbal,Sage,Woody
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...,Apricot,Citrus,Grapefruit
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%...",Citrus,Earthy,Orange
...,...,...,...,...,...,...,...,...,...
2346,Zeus-Og,hybrid,4.7,"Happy,Uplifted,Relaxed,Euphoric,Energetic","Earthy,Woody,Pine",Zeus OG is a hybrid cross between Pineapple OG...,Earthy,Woody,Pine
2347,Zkittlez,indica,4.6,"Relaxed,Happy,Euphoric,Uplifted,Sleepy","Sweet,Berry,Grape",Zkittlez is an indica-dominant mix of Grape Ap...,Sweet,Berry,Grape
2348,Zombie-Kush,indica,5.0,"Relaxed,Sleepy,Talkative,Euphoric,Happy","Earthy,Sweet,Spicy/Herbal",Zombie Kush by Ripper Seeds comes from two dif...,Earthy,Sweet,Spicy/Herbal
2349,Zombie-Og,indica,4.4,"Relaxed,Sleepy,Euphoric,Happy,Hungry","Sweet,Earthy,Pungent",If you’re looking to transform into a flesh-ea...,Sweet,Earthy,Pungent


In [16]:
cols_for_ranks('Effects')

Unnamed: 0,Strain,Type,Rating,Effects,Flavor,Description,Effects_0,Effects_1,Effects_2,Effects_3,Effects_4
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...,Creative,Energetic,Tingly,Euphoric,Relaxed
1,98-White-Widow,hybrid,4.7,"Relaxed,Aroused,Creative,Happy,Energetic","Flowery,Violet,Diesel",The ‘98 Aloha White Widow is an especially pot...,Relaxed,Aroused,Creative,Happy,Energetic
2,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...,Uplifted,Happy,Relaxed,Energetic,Creative
3,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...,Tingly,Creative,Hungry,Relaxed,Uplifted
4,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%...",Happy,Relaxed,Euphoric,Uplifted,Talkative
...,...,...,...,...,...,...,...,...,...,...,...
2346,Zeus-Og,hybrid,4.7,"Happy,Uplifted,Relaxed,Euphoric,Energetic","Earthy,Woody,Pine",Zeus OG is a hybrid cross between Pineapple OG...,Happy,Uplifted,Relaxed,Euphoric,Energetic
2347,Zkittlez,indica,4.6,"Relaxed,Happy,Euphoric,Uplifted,Sleepy","Sweet,Berry,Grape",Zkittlez is an indica-dominant mix of Grape Ap...,Relaxed,Happy,Euphoric,Uplifted,Sleepy
2348,Zombie-Kush,indica,5.0,"Relaxed,Sleepy,Talkative,Euphoric,Happy","Earthy,Sweet,Spicy/Herbal",Zombie Kush by Ripper Seeds comes from two dif...,Relaxed,Sleepy,Talkative,Euphoric,Happy
2349,Zombie-Og,indica,4.4,"Relaxed,Sleepy,Euphoric,Happy,Hungry","Sweet,Earthy,Pungent",If you’re looking to transform into a flesh-ea...,Relaxed,Sleepy,Euphoric,Happy,Hungry


In [17]:
data['Effects'] = data['Effects'].str.replace('\n', '')
data['Effects'] = data['Effects'].str.replace(' ', '')
data['Effects'] = data['Effects'].str.replace('Dry,Mouth', 'DryMouth')
data['Effects'] = data['Effects'].str.replace('Energentic', 'Energetic')

data.drop(data[data.Effects.str.contains('None')].index, axis = 0, inplace = True)

In [18]:
def binarize_data(col_name, var_type):
    binarizer = MultiLabelBinarizer()
    bin_data = pd.DataFrame(binarizer.fit_transform(data[col_name].str.split(',')), columns=binarizer.classes_, index=data.index).add_prefix(var_type + '_')
    #bin_cols_appended = pd.concat([data, bin_data], axis = 1)
    return bin_data


In [20]:
features = binarize_data('Flavor', 'feature')
targets = binarize_data('Effects', 'target')

feats_train, feats_test, targets_train, targets_test = train_test_split(features, targets, test_size=0.2, random_state=10)

In [158]:
rf = RandomForestClassifier(n_estimators = 100, random_state=10)
rf.fit(feats_train, targets_train)
print('subset accuracy: ' + str(round(100 * rf.score(feats_test, targets_test), 2)) + '%')

targets_test_pred = rf.predict(feats_test)
print('coverage error: ' + str(round(coverage_error(targets_test, targets_test_pred), 2)))

scores = []
for x in targets_test.values + targets_test_pred:
    scores.append(list(x).count(2))
print('avg number of correct labels: ' + str(round(np.mean(scores), 2)))

print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')

scores = []
for x in targets_test.values - targets_test_pred:
    scores.append(list(x).count(0))
print('subset accuracy [custom implementation]: ' + str(round(scores.count(14)/len(scores)*100, 2)) + '%')

subset accuracy: 2.61%
coverage error: 13.68
avg number of correct labels: 2.92
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
subset accuracy [custom implementation]: 2.61%


In [159]:
def null_rand_draw(test_samp_size):
    preds = []
    for x in range(test_samp_size):
        randpred = list(np.random.choice(a = list((np.sum(targets)/np.sum(np.sum(targets))).index), size = 5, replace = False, p = list((np.sum(targets)/np.sum(np.sum(targets))).values)))
        preds.append([1 if target in randpred else 0 for target in list(targets.columns)])
    return preds

In [162]:
targets_test_pred = null_rand_draw(len(targets_test))

scores = []
for x in targets_test.values - targets_test_pred:
    scores.append(list(x).count(0))
print('subset accuracy: ' + str(round(scores.count(len(targets.columns))/len(scores)*100, 2)) + '%')

print('coverage error: ' + str(round(coverage_error(targets_test, targets_test_pred), 2)))

scores = []
for x in targets_test.values + targets_test_pred:
    scores.append(list(x).count(2))
print('avg number of correct labels: ' + str(round(np.mean(scores), 2)))

subset accuracy [custom implementation]: 0.87%
coverage error: 13.88
avg number of correct labels: 2.54


In [124]:
def null_top_5(test_samp_size):
    top5 = list(np.sum(targets).sort_values(ascending = False).head(5).index)
    preds = [[1 if target in top5 else 0 for target in list(targets.columns)]] * test_samp_size
    return preds

In [163]:
targets_test_pred = null_top_5(len(targets_test))

scores = []
for x in targets_test.values - targets_test_pred:
    scores.append(list(x).count(0))
print('subset accuracy: ' + str(round(scores.count(len(targets.columns))/len(scores)*100, 2)) + '%')

print('coverage error: ' + str(round(coverage_error(targets_test, targets_test_pred), 2)))

scores = []
for x in targets_test.values + targets_test_pred:
    scores.append(list(x).count(2))
print('avg number of correct labels: ' + str(round(np.mean(scores), 2)))

subset accuracy [custom implementation]: 6.97%
coverage error: 13.29
avg number of correct labels: 3.32


In [71]:
rf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 10,
 'verbose': 0,
 'warm_start': False}