In [1]:
%matplotlib inline
from gensim.utils import simple_preprocess
from gensim.models import Word2Vec
from tqdm.notebook import tqdm
from xgboost import XGBRegressor
import lightgbm as lgbm

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import pickle
import os

  import pandas.util.testing as tm


In [2]:
xgb_all = pickle.load(open(os.path.join('mdl', 'xgb_all.pkl'), 'rb'))
w2v_model = Word2Vec.load(os.path.join('mdl', 'word2vec.model'))

In [3]:
def get_wv_vector(string):
    vec =  [word for word in simple_preprocess(string) if word in w2v_model.wv.vocab]
    return np.mean(w2v_model.wv[vec], axis=0)

### Original

In [13]:
original_title = get_wv_vector('Puppy Meets Their Reflection')
original_description = get_wv_vector('My puppy, Maggie, a bernedoodle meets her reflection for the first time!')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute')

In [5]:
img_feature = [0,0,0,0,0] # img
cat_feature = [0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0]
day_feature = [0,0,0,0,1,0,0]
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

In [6]:
original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count: {original_prediction}')

Expected View Count: 1758881.5


### Modified

**Day**

In [7]:
for i in range(len(day_feature)):
    img_feature = [0,0,0,0,0] # img
    cat_feature = [0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0]
    day_feature = [0,0,0,0,0,0,0]
    day_feature[i] = 1
    feature = img_feature + cat_feature + day_feature + list(wrd_feature)
    original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
    print(f'Expected View Count {i}: {original_prediction}')

Expected View Count 0: 1966316.0
Expected View Count 1: 1945980.875
Expected View Count 2: 1964039.75
Expected View Count 3: 1889141.625
Expected View Count 4: 1758881.5
Expected View Count 5: 1947795.875
Expected View Count 6: 1966316.0


The best day is Friday.

**Category**

In [8]:
for i in range(len(cat_feature)):
    img_feature = [0,0,0,0,0] # img
    cat_feature = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    cat_feature[i] = 1
    day_feature = [1,0,0,0,0,0,0]
    feature = img_feature + cat_feature + day_feature + list(wrd_feature)
    original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
    print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 0:	2137770.25
Expected View Count 1:	2041368.875
Expected View Count 2:	2660398.25
Expected View Count 3:	1966316.0
Expected View Count 4:	2041368.875
Expected View Count 5:	1922074.125
Expected View Count 6:	2052423.875
Expected View Count 7:	2063359.375
Expected View Count 8:	2098657.0
Expected View Count 9:	4485541.0
Expected View Count 10:	2041368.875
Expected View Count 11:	2041368.875
Expected View Count 12:	2041368.875
Expected View Count 13:	1937861.75
Expected View Count 14:	2041368.875
Expected View Count 15:	2041368.875


The best cateogry is Entertainment

**Tittle**

In [15]:
original_title = get_wv_vector('Puppy Barks at Their Reflection')
original_description = get_wv_vector('My puppy, Maggie, a bernedoodle meets her reflection for the first time!')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)

img_feature = [0,0,0,0,0] # img
cat_feature = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
cat_feature[9] = 1
day_feature = [1,0,0,0,0,0,0]
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	1035820.9375


In [10]:
original_title = get_wv_vector('Puppy Barks Loves Their Reflection')
original_description = get_wv_vector('My puppy, Maggie, a bernedoodle meets her reflection for the first time!')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	1493918.5


In [11]:
original_title = get_wv_vector('Puppy Barks Scared of Their Reflection')
original_description = get_wv_vector('My puppy, Maggie, a bernedoodle meets her reflection for the first time!')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	1759097.25


In [12]:
original_title = get_wv_vector('Puppy Barks at Themself')
original_description = get_wv_vector('My puppy, Maggie, a bernedoodle meets her reflection for the first time!')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	2311209.25


**Description**

In [13]:
original_title = get_wv_vector('Puppy Barks at Themself')
original_description = get_wv_vector('My puppy, Maggie, a bernedoodle meets her reflection and gets scared!')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	817835.125


In [14]:
original_title = get_wv_vector('Puppy Barks at Themself')
original_description = get_wv_vector('Maggie, a puppy bernedoodle, sees her reflection for the first time. She barks and has so much fun')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	516481.4375


In [15]:
original_title = get_wv_vector('Puppy Barks at Themself')
original_description = get_wv_vector('Crazy puppy barks at her reflection. Tail wagging, she\'s having so much!')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	2263795.25


In [16]:
original_title = get_wv_vector('Puppy Barks at Themself')
original_description = get_wv_vector('My puppy has so much fun as she barks at her reflection.')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	2871745.25


**Tags**

In [17]:
original_title = get_wv_vector('Puppy Barks at Themself')
original_description = get_wv_vector('My puppy has so much fun as she barks at her reflection.')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute, bernese, tail')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	2897612.5


In [18]:
original_title = get_wv_vector('Puppy Barks at Themself')
original_description = get_wv_vector('My puppy has so much fun as she barks at her reflection.')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, funny, cute, bernese, tail wag, joy')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	2104635.75


In [23]:
original_title = get_wv_vector('Puppy Barks at Themself')
original_description = get_wv_vector('My puppy has so much fun as she barks at her reflection.')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, cute, bernese, tail wag, joy')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	2980837.25


### using best title

In [16]:
original_title = get_wv_vector('Puppy Barks at Their Reflection')
original_description = get_wv_vector('My puppy has so much fun as she barks at her reflection.')
original_tags = get_wv_vector('puppy, dog, bernedoodle, reflection, cute, bernese, tail wag, joy')
wrd_feature = np.mean([original_title, original_description, original_tags], axis=0)
feature = img_feature + cat_feature + day_feature + list(wrd_feature)

original_prediction = xgb_all.predict(np.array([feature]), validate_features=False)[0]
print(f'Expected View Count {i}:\t{original_prediction}')

Expected View Count 15:	6718124.5
