In [1]:
from huggingface_hub.inference_api import InferenceApi

import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [2]:
def get_classes(row, labels, min_score):
    s = row[labels]
    s = s.sort_values(ascending=False)
    return s[s > min_score].index.values   

In [3]:
def results_to_df(results, labels, sequences, min_score = 0.9):
    nresults = len(results)
    scores = pd.DataFrame(0, index=range(nresults), columns=labels, dtype=float)

    nlabels = len(labels)
    for j in range(nresults):    
        r = results[j]

        for i in range(nlabels):
            scores.loc[j, r['labels'][i]] = r['scores'][i]

    scores['classes'] = scores.apply(lambda s: get_classes(s, labels, min_score), axis=1)
    scores['has_class'] = scores.classes.apply(lambda x: len(x) > 0)
    scores['max_class'] = scores.classes.apply(lambda x: x[0] if len(x) > 0 else None)
    scores['seq'] = sequences
    return scores

In [4]:
API_TOKEN = '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'
inference = InferenceApi(repo_id="facebook/bart-large-mnli", token=API_TOKEN)

In [5]:
sequences_to_classify = ["one day I will see the world", "best steak in america"]
candidate_labels = ['travel', 'cooking', 'dancing', 'exploration']

In [6]:
cols = open("videostoingest_cols.txt").readlines()
cols = [c.strip() for c in cols]
print(cols)

vd = pd.read_csv('VideosToIngest_2022-09-01.tsv', sep='\t', header=None, names=cols)

['VideoKey', 'VideoId', 'VideoUrl', 'ChannelName', 'ChannelId', 'Description', 'Title', 'PlayerType', 'PlayerParams', 'ProductsJson', 'ChannelImage', 'Status', 'StartTimeInMillis', 'DurationInSeconds', 'ThumbnailUrl', 'TrailerUrl', 'Tags', 'MinPrice', 'ViewCount', 'Market', 'ChannelKey', 'PartnerAlias', 'StreamingType', 'StreamingUrl']


In [7]:
samples = vd[:5]

In [8]:
desc = list(samples.Description.values)
titles = list(samples.Title.values)

In [9]:
candidate_labels = ['travel', 'food', 'kitchen', 'home improvement',  'home decor', 'home furnishings', 'apparel', 'shoes', 'lawn and garden', 'computing', 'electronics', 'art and craft', 'photography', 'storage', 'organization', 'pets', 'baby', 'kids', 'music', 'car and garage', 'toys', 'tools and hardware', 'jewelry and watches', 'sports', 'outdoor', 'books and magazines', 'health', 'beauty', 'fragrance', 'productivity', 'office supplies', 'holiday', 'deals', 'review']
len(candidate_labels)

34

In [10]:
sel_labels = candidate_labels[:10]

In [11]:
params = {"candidate_labels": sel_labels, "multi_label": True}

In [12]:
results_bart = inference(titles, params)

In [13]:
results_bart

[{'sequence': 'TOP TEN LARGE WALL ART DIY IDEAS | DIY IKEA HACK | AFFORDABLE & AESTHETIC',
  'labels': ['home decor',
   'home improvement',
   'home furnishings',
   'computing',
   'kitchen',
   'travel',
   'apparel',
   'shoes',
   'food',
   'lawn and garden'],
  'scores': [0.07297678291797638,
   0.05170673877000809,
   0.026421133428812027,
   0.0014238181756809354,
   0.001409256481565535,
   0.0005605021142400801,
   0.00033417652593925595,
   0.00015858169354032725,
   0.00012850352504756302,
   0.00010840879986062646]},
 {'sequence': 'TOP 20 DIY DOLLAR TREE FALL HOME DECOR COMPILATION 2022 | HIGH END & NOT CHEESY FALL DECOR',
  'labels': ['home decor',
   'home improvement',
   'home furnishings',
   'computing',
   'kitchen',
   'travel',
   'food',
   'apparel',
   'lawn and garden',
   'shoes'],
  'scores': [0.9959635734558105,
   0.8865137696266174,
   0.8708468079566956,
   0.5940498113632202,
   0.15482766926288605,
   0.0009228277485817671,
   0.00046548040700145066,


In [14]:
df_bart = results_to_df(results_bart, labels=sel_labels, sequences=titles)

In [15]:
df_bart

Unnamed: 0,travel,food,kitchen,home improvement,home decor,home furnishings,apparel,shoes,lawn and garden,computing,classes,has_class,max_class,seq
0,0.000561,0.000129,0.001409,0.051707,0.072977,0.026421,0.000334,0.000159,0.000108,0.001424,[],False,,TOP TEN LARGE WALL ART DIY IDEAS | DIY IKEA HACK | AFFORDABLE & AESTHETIC
1,0.000923,0.000465,0.154828,0.886514,0.995964,0.870847,0.00031,7.8e-05,0.000152,0.59405,[home decor],True,home decor,TOP 20 DIY DOLLAR TREE FALL HOME DECOR COMPILATION 2022 | HIGH END & NOT CHEESY FALL DECOR
2,0.000788,0.00041,0.505863,0.619939,0.447389,0.956606,0.000778,0.000243,0.026142,0.018168,[home furnishings],True,home furnishings,"DIY IKEA HACKS | AFFORDABLE, AESTHETIC, AND SUPER EASY 2022"
3,0.000171,0.000134,0.16379,0.989792,0.997865,0.995192,0.000317,0.000112,7.4e-05,0.001345,"[home decor, home furnishings, home improvement]",True,home decor,THRIFT WITH ME FOR HIGH END HOME DECOR | DIY THRIFT FLIP ROOM DECOR + HUGE DECOR THRIFT HAUL
4,0.01948,0.000743,0.402995,0.671087,0.993471,0.95077,0.005256,0.002019,0.1667,0.081726,"[home decor, home furnishings]",True,home decor,TOP 22 THRIFTED HOME DECOR FINDS | DIY THRIFTED HOME DECOR COMPILATION


In [10]:
inference_distilbert = InferenceApi(repo_id="typeform/distilbert-base-uncased-mnli", token=API_TOKEN)

In [16]:
results = inference_distilbert(titles, params)

In [21]:
df_distilbert = results_to_df(results, labels=candidate_labels[:10], sequences=titles)

In [22]:
df_distilbert

Unnamed: 0,travel,food,kitchen,home improvement,home decor,home furnishings,apparel,shoes,lawn and garden,computing,classes,has_class,max_class,seq
0,4e-05,4.8e-05,4.6e-05,0.001367,8.7e-05,8.7e-05,0.002411,4.2e-05,0.000112,4.4e-05,[],False,,TOP TEN LARGE WALL ART DIY IDEAS | DIY IKEA HACK | AFFORDABLE & AESTHETIC
1,6.8e-05,0.000119,7.4e-05,0.619678,0.720001,0.984961,0.124405,0.000132,0.064587,8e-05,[home furnishings],True,home furnishings,TOP 20 DIY DOLLAR TREE FALL HOME DECOR COMPILATION 2022 | HIGH END & NOT CHEESY FALL DECOR
2,6.4e-05,0.001197,0.000113,0.934864,0.814112,0.989971,0.991312,6.8e-05,0.28458,0.100804,"[apparel, home furnishings, home improvement]",True,apparel,"DIY IKEA HACKS | AFFORDABLE, AESTHETIC, AND SUPER EASY 2022"
3,0.01194,0.070653,8.2e-05,0.714349,0.995737,0.997555,0.215293,0.005084,0.016319,0.001104,"[home furnishings, home decor]",True,home furnishings,THRIFT WITH ME FOR HIGH END HOME DECOR | DIY THRIFT FLIP ROOM DECOR + HUGE DECOR THRIFT HAUL
4,0.000165,0.00069,0.000188,0.72073,0.999655,0.99984,0.011078,0.00024,0.275992,0.019098,"[home furnishings, home decor]",True,home furnishings,TOP 22 THRIFTED HOME DECOR FINDS | DIY THRIFTED HOME DECOR COMPILATION
5,0.017835,0.002699,0.00013,0.513331,0.591688,0.666838,0.831846,0.00546,0.012879,0.348965,[],False,,DECORATE WITH ME FOR FALL | DIY THRIFT FLIPS + HUGE FALL INSPIRED THRIFT HAUL 2022
6,6.3e-05,0.000102,0.000126,0.672213,0.977235,0.99847,0.007537,6.9e-05,0.400024,0.000117,"[home furnishings, home decor]",True,home furnishings,DIY THRIFT FLIP HOME DECOR ON A BUDGET
7,6.5e-05,0.000219,0.000222,0.997691,0.977955,0.999222,0.278022,0.004304,0.927216,0.160939,"[home furnishings, home improvement, home decor, lawn and garden]",True,home furnishings,New Most Affordable Indoor and Outdoor Home Lighting?! Inlight Collection Unboxing and Install
8,0.000132,0.000105,9.2e-05,0.000422,0.011346,0.974358,0.029242,0.000123,0.120526,0.090578,[home furnishings],True,home furnishings,HOW TO: 90'S BLOWOUT AT HOME
9,0.991687,6.3e-05,0.000117,0.594497,0.001018,0.156248,0.396392,0.001004,0.099121,0.172997,[travel],True,travel,Desk Tour 2022: Minimal Work & Gaming Setup
