<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Acknowledgements" data-toc-modified-id="Acknowledgements-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Acknowledgements</a></span></li><li><span><a href="#Prepare-data-and-model" data-toc-modified-id="Prepare-data-and-model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Prepare data and model</a></span></li><li><span><a href="#Make-feature-matrix-(word2vec,-votes,-stars)" data-toc-modified-id="Make-feature-matrix-(word2vec,-votes,-stars)-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Make feature matrix (word2vec, votes, stars)</a></span></li><li><span><a href="#Create-Label-y-(Business-categories)" data-toc-modified-id="Create-Label-y-(Business-categories)-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Create Label y (Business categories)</a></span></li><li><span><a href="#Join-x,y-(feature-matrix,-category)-using-business_id" data-toc-modified-id="Join-x,y-(feature-matrix,-category)-using-business_id-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Join x,y (feature matrix, category) using business_id</a></span></li><li><span><a href="#Category-Prediction" data-toc-modified-id="Category-Prediction-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Category Prediction</a></span><ul class="toc-item"><li><span><a href="#Plot-ROC-curve-to-assess" data-toc-modified-id="Plot-ROC-curve-to-assess-6.1"><span class="toc-item-num">6.1&nbsp;&nbsp;</span>Plot ROC curve to assess</a></span></li></ul></li><li><span><a href="#Cluster-with-metadata-(useful,-cool,-funny,-stars)" data-toc-modified-id="Cluster-with-metadata-(useful,-cool,-funny,-stars)-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Cluster with metadata (useful, cool, funny, stars)</a></span></li></ul></div>

# Acknowledgements
Thanks to the tutorial: https://www.kaggle.com/c/word2vec-nlp-tutorial/overview/part-3-more-fun-with-word-vectors

# Prepare data and model

In [92]:
%matplotlib inline
import pandas as pd
pd.options.display.max_columns = 999
import numpy as np
import matplotlib.pyplot as plt

import re

import nltk
import nltk.data
nltk.download('stopwords')
from nltk.corpus import stopwords # Import the stop word list



[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/daviderickson/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
def load_reviews(size='small'): 
    if size == 'small':
        filename = r'../../data/small-review.json'
    elif size == 'intermediate':
        filename = r'../../data/intermediate-review.json'
    elif size == 'full':
        filename = r'../../data/review.json'
    new_list = []
    for line in open(filename):
       new_list.append(json.loads(line))
    return pd.DataFrame.from_records(new_list)

dfreviews = load_reviews(size='intermediate')

In [3]:
dfreviews.head()

Unnamed: 0,business_id,cool,date,funny,review_id,stars,text,useful,user_id
0,ujmEBvifdJM6h6RLv4wQIg,0,2013-05-07 04:34:36,1,Q1sbwvVQXV2734tPgoKj4Q,1.0,Total bill for this horrible service? Over $8G...,6,hG7b0MtEbXx5QzbzE6C_VA
1,NZnhc2sEQy3RmzKTZnqtwQ,0,2017-01-14 21:30:33,0,GJXCdrto3ASJOqKeVWPi6Q,5.0,I *adore* Travis at the Hard Rock's new Kelly ...,0,yXQM5uF2jS6es16SJzNHfg
2,WTqjgwHlXbSFevF32_DJVw,0,2016-11-09 20:09:03,0,2TzJjDVDEuAW6MR5Vuc1ug,5.0,I have to say that this office really has it t...,3,n6-Gk65cPZL6Uz8qRm3NYw
3,ikCg8xy5JIg_NGPx-MSIDA,0,2018-01-09 20:56:38,0,yi0R0Ugj_xUx_Nek0-_Qig,5.0,Went in for a lunch. Steak sandwich was delici...,0,dacAIZ6fTM6mqwW5uxkskg
4,b1b1eb3uo-w561D0ZfCEiQ,0,2018-01-30 23:07:38,0,11a8sVPMUFtaC7_ABRkmtw,1.0,Today was my second out of three sessions I ha...,7,ssoyf2_x0EQMed6fgHeMyQ


In [4]:
dfreviews.columns

Index(['business_id', 'cool', 'date', 'funny', 'review_id', 'stars', 'text',
       'useful', 'user_id'],
      dtype='object')

In [5]:
dfreviews['text'][0]

'Total bill for this horrible service? Over $8Gs. These crooks actually had the nerve to charge us $69 for 3 pills. I checked online the pills can be had for 19 cents EACH! Avoid Hospital ERs at all costs.'

In [6]:
# For simplicity, drop anything that isn't a letter
# Numbers and symbols may have interesting meaning and could be explore later

def lettersOnly(string):
    return re.sub("[^a-zA-Z]", " ", string) 

dfreviews['text'] = dfreviews['text'].apply(lettersOnly)


In [7]:
dfreviews['text'][0]

'Total bill for this horrible service  Over   Gs  These crooks actually had the nerve to charge us     for   pills  I checked online the pills can be had for    cents EACH  Avoid Hospital ERs at all costs '

In [8]:
def review_to_wordlist(string, remove_stopwords=False):
    string = re.sub("[^a-zA-Z]", " ", string) # keep only letters. more complex model possible later
    words =  string.lower().split() # make everything lowercase. split into words
    if remove_stopwords:
        stops = set(stopwords.words('english')) # create a fast lookup for stopwords
        words = [w for w in words if not w in stops] # remove stopwords
    return( words) # return a list of words
    
# dfreviews['text'] = dfreviews['text'].apply(review_to_words) # apply to reviews in dataframe


In [9]:
# Word2Vec expects single sentences, each one as a list of words

# Load the punkt tokenizer
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

# Define a function to split a review into parsed sentences
def review_to_sentences( review, tokenizer, remove_stopwords=False ):
    # Function to split a review into parsed sentences. Returns a 
    # list of sentences, where each sentence is a list of words
    #
    # 1. Use the NLTK tokenizer to split the paragraph into sentences
    raw_sentences = tokenizer.tokenize(review.strip())
    #
    # 2. Loop over each sentence
    sentences = []
    for raw_sentence in raw_sentences:
        # If a sentence is empty, skip it
        if len(raw_sentence) > 0:
            # Otherwise, call review_to_wordlist to get a list of words
            sentences.append( review_to_wordlist( raw_sentence, \
              remove_stopwords ))
    #
    # Return the list of sentences (each sentence is a list of words,
    # so this returns a list of lists
    return sentences

In [10]:
sentences = []  # Initialize an empty list of sentences

print("Parsing sentences")
for review in dfreviews["text"]:
    sentences += review_to_sentences(review, tokenizer)

Parsing sentences


In [11]:
# Import the built-in logging module and configure it so that Word2Vec 
# creates nice output messages
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',\
    level=logging.INFO)

# Set values for various parameters
num_features = 300    # Word vector dimensionality                      
min_word_count = 40   # Minimum word count                        
num_workers = 4       # Number of threads to run in parallel
context = 10          # Context window size                                                                                    
downsampling = 1e-3   # Downsample setting for frequent words

# Initialize and train the model (this will take some time)
from gensim.models import word2vec
print("Training model...")
model = word2vec.Word2Vec(sentences, workers=num_workers, \
            size=num_features, min_count = min_word_count, \
            window = context, sample = downsampling)

# If you don't plan to train the model any further, calling 
# init_sims will make the model much more memory-efficient.
model.init_sims(replace=True)

# It can be helpful to create a meaningful model name and 
# save the model for later use. You can load it later using Word2Vec.load()
model_name = "300features_40minwords_10context"
model.save(model_name)

2020-01-17 17:29:12,471 : INFO : 'pattern' package not found; tag filters are not available for English
2020-01-17 17:29:12,481 : INFO : collecting all words and their counts
2020-01-17 17:29:12,482 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-01-17 17:29:12,667 : INFO : PROGRESS: at sentence #10000, processed 1088334 words, keeping 25539 word types


Training model...


2020-01-17 17:29:12,846 : INFO : PROGRESS: at sentence #20000, processed 2172597 words, keeping 35463 word types
2020-01-17 17:29:13,011 : INFO : PROGRESS: at sentence #30000, processed 3251616 words, keeping 42649 word types
2020-01-17 17:29:13,188 : INFO : PROGRESS: at sentence #40000, processed 4373996 words, keeping 48893 word types
2020-01-17 17:29:13,358 : INFO : PROGRESS: at sentence #50000, processed 5471587 words, keeping 53964 word types
2020-01-17 17:29:13,527 : INFO : PROGRESS: at sentence #60000, processed 6570064 words, keeping 58362 word types
2020-01-17 17:29:13,696 : INFO : PROGRESS: at sentence #70000, processed 7667364 words, keeping 62704 word types
2020-01-17 17:29:13,866 : INFO : PROGRESS: at sentence #80000, processed 8768955 words, keeping 66443 word types
2020-01-17 17:29:14,035 : INFO : PROGRESS: at sentence #90000, processed 9872097 words, keeping 70199 word types
2020-01-17 17:29:14,208 : INFO : collected 73717 word types from a corpus of 10978770 raw words 

2020-01-17 17:29:56,274 : INFO : EPOCH 5 - PROGRESS: at 42.21% examples, 810529 words/s, in_qsize 7, out_qsize 0
2020-01-17 17:29:57,284 : INFO : EPOCH 5 - PROGRESS: at 53.21% examples, 817122 words/s, in_qsize 7, out_qsize 0
2020-01-17 17:29:58,302 : INFO : EPOCH 5 - PROGRESS: at 63.46% examples, 813511 words/s, in_qsize 7, out_qsize 0
2020-01-17 17:29:59,314 : INFO : EPOCH 5 - PROGRESS: at 71.62% examples, 787696 words/s, in_qsize 7, out_qsize 0
2020-01-17 17:30:00,314 : INFO : EPOCH 5 - PROGRESS: at 80.73% examples, 777280 words/s, in_qsize 7, out_qsize 0
2020-01-17 17:30:01,319 : INFO : EPOCH 5 - PROGRESS: at 91.63% examples, 785717 words/s, in_qsize 7, out_qsize 0
2020-01-17 17:30:02,111 : INFO : worker thread finished; awaiting finish of 3 more threads
2020-01-17 17:30:02,120 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-01-17 17:30:02,121 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-01-17 17:30:02,143 : INFO : worker thread fi

In [12]:
model.most_similar('pizza')

  """Entry point for launching an IPython kernel.


[('crust', 0.7031250596046448),
 ('pizzas', 0.6815289855003357),
 ('pepperoni', 0.675153374671936),
 ('margherita', 0.6247320771217346),
 ('calzone', 0.6213208436965942),
 ('lasagna', 0.5487133860588074),
 ('mozzarella', 0.5260012149810791),
 ('pasta', 0.5250520706176758),
 ('dough', 0.521996021270752),
 ('subs', 0.5141575336456299)]

In [13]:
model.most_similar('service')

  """Entry point for launching an IPython kernel.


[('waitstaff', 0.5392504930496216),
 ('staff', 0.4584592878818512),
 ('hostesses', 0.4309860169887543),
 ('hospitality', 0.4167857766151428),
 ('communication', 0.4150824546813965),
 ('bartenders', 0.4137731194496155),
 ('servers', 0.40130314230918884),
 ('value', 0.3976908028125763),
 ('food', 0.3945622742176056),
 ('vibes', 0.3835543394088745)]

In [14]:
model.most_similar('bad')

  """Entry point for launching an IPython kernel.


[('terrible', 0.6269491910934448),
 ('horrible', 0.5781732797622681),
 ('good', 0.5464430451393127),
 ('poor', 0.5298254489898682),
 ('awful', 0.517661988735199),
 ('disappointing', 0.49108368158340454),
 ('alright', 0.4890827238559723),
 ('subpar', 0.46436044573783875),
 ('greatest', 0.4499537944793701),
 ('acceptable', 0.4481072425842285)]

In [15]:
import numpy as np  # Make sure that numpy is imported

def makeFeatureVec(words, model, num_features):
    # Function to average all of the word vectors in a given
    # paragraph
    #
    # Pre-initialize an empty numpy array (for speed)
    featureVec = np.zeros((num_features,),dtype="float32")
    #
    nwords = 0.
    # 
    # WV.Index2word is a list that contains the names of the words in 
    # the model's vocabulary. Convert it to a set, for speed 
    index2word_set = set(model.wv.index2word)
    #
    # Loop over each word in the review and, if it is in the model's
    # vocaublary, add its feature vector to the total
    for word in words:
        if word in index2word_set: 
            nwords = nwords + 1.
            featureVec = np.add(featureVec,model[word])
    # 
    # Divide the result by the number of words to get the average
    featureVec = np.divide(featureVec,nwords)
    return featureVec


def getAvgFeatureVecs(reviews, model, num_features):
    # Given a set of reviews (each one a list of words), calculate 
    # the average feature vector for each one and return a 2D numpy array 
    # 
    # Initialize a counter
    counter = int(0.)
    # 
    # Preallocate a 2D numpy array, for speed
    reviewFeatureVecs = np.zeros((len(reviews),num_features),dtype="float32")
    # 
    # Loop through the reviews
    for review in reviews:
       #
       # Print a status message every 1000th review
       if counter%1000. == 0.:
           print ("Review %d of %d" % (counter, len(reviews)))
       # 
       # Call the function (defined above) that makes average feature vectors
       reviewFeatureVecs[counter] = makeFeatureVec(review, model, \
           num_features)
       #
       # Increment the counter
       counter = counter + 1
    return reviewFeatureVecs

In [16]:
# ****************************************************************
# Calculate average feature vectors
# using the functions we defined above. Notice that we now use stop word
# removal.

clean_reviews = []
for review in dfreviews["text"]:
    clean_reviews.append( review_to_wordlist( review, \
        remove_stopwords=True ))

reviewDataVecs = getAvgFeatureVecs( clean_reviews, model, num_features )

Review 0 of 100000




Review 1000 of 100000
Review 2000 of 100000
Review 3000 of 100000
Review 4000 of 100000
Review 5000 of 100000
Review 6000 of 100000
Review 7000 of 100000
Review 8000 of 100000




Review 9000 of 100000
Review 10000 of 100000
Review 11000 of 100000
Review 12000 of 100000
Review 13000 of 100000
Review 14000 of 100000
Review 15000 of 100000
Review 16000 of 100000
Review 17000 of 100000
Review 18000 of 100000
Review 19000 of 100000
Review 20000 of 100000
Review 21000 of 100000
Review 22000 of 100000
Review 23000 of 100000
Review 24000 of 100000
Review 25000 of 100000
Review 26000 of 100000
Review 27000 of 100000
Review 28000 of 100000
Review 29000 of 100000
Review 30000 of 100000
Review 31000 of 100000
Review 32000 of 100000
Review 33000 of 100000
Review 34000 of 100000
Review 35000 of 100000
Review 36000 of 100000
Review 37000 of 100000
Review 38000 of 100000
Review 39000 of 100000
Review 40000 of 100000
Review 41000 of 100000
Review 42000 of 100000
Review 43000 of 100000
Review 44000 of 100000
Review 45000 of 100000
Review 46000 of 100000
Review 47000 of 100000
Review 48000 of 100000
Review 49000 of 100000
Review 50000 of 100000
Review 51000 of 100000
Review 52000

# Make feature matrix (word2vec, votes, stars)

In [17]:
reviewDataVecs.shape[1]

300

In [18]:
# Add non-text data back to feature matrix
review_features = ['cool', 'funny', 'useful', 'stars' , 'business_id']
all_features_labels = ['w2v{}'.format(idx) for idx in range(reviewDataVecs.shape[1])] + review_features
all_features = np.append(reviewDataVecs, dfreviews[review_features].to_numpy(), 1)


In [19]:
# Create df 
all_features_df = pd.DataFrame(data=all_features, columns=all_features_labels)

# Convert all but business_id to numerical
business_ids = all_features_df['business_id']
all_features_df = all_features_df.iloc[:,:-1].astype('float64')
all_features_df['business_id'] = business_ids
del business_ids

# Group by business_id
all_features_business = all_features_df.groupby(by='business_id').mean()

In [20]:
all_features_business.head()

Unnamed: 0_level_0,w2v0,w2v1,w2v2,w2v3,w2v4,w2v5,w2v6,w2v7,w2v8,w2v9,w2v10,w2v11,w2v12,w2v13,w2v14,w2v15,w2v16,w2v17,w2v18,w2v19,w2v20,w2v21,w2v22,w2v23,w2v24,w2v25,w2v26,w2v27,w2v28,w2v29,w2v30,w2v31,w2v32,w2v33,w2v34,w2v35,w2v36,w2v37,w2v38,w2v39,w2v40,w2v41,w2v42,w2v43,w2v44,w2v45,w2v46,w2v47,w2v48,w2v49,w2v50,w2v51,w2v52,w2v53,w2v54,w2v55,w2v56,w2v57,w2v58,w2v59,w2v60,w2v61,w2v62,w2v63,w2v64,w2v65,w2v66,w2v67,w2v68,w2v69,w2v70,w2v71,w2v72,w2v73,w2v74,w2v75,w2v76,w2v77,w2v78,w2v79,w2v80,w2v81,w2v82,w2v83,w2v84,w2v85,w2v86,w2v87,w2v88,w2v89,w2v90,w2v91,w2v92,w2v93,w2v94,w2v95,w2v96,w2v97,w2v98,w2v99,w2v100,w2v101,w2v102,w2v103,w2v104,w2v105,w2v106,w2v107,w2v108,w2v109,w2v110,w2v111,w2v112,w2v113,w2v114,w2v115,w2v116,w2v117,w2v118,w2v119,w2v120,w2v121,w2v122,w2v123,w2v124,w2v125,w2v126,w2v127,w2v128,w2v129,w2v130,w2v131,w2v132,w2v133,w2v134,w2v135,w2v136,w2v137,w2v138,w2v139,w2v140,w2v141,w2v142,w2v143,w2v144,w2v145,w2v146,w2v147,w2v148,w2v149,w2v150,w2v151,w2v152,w2v153,w2v154,w2v155,w2v156,w2v157,w2v158,w2v159,w2v160,w2v161,w2v162,w2v163,w2v164,w2v165,w2v166,w2v167,w2v168,w2v169,w2v170,w2v171,w2v172,w2v173,w2v174,w2v175,w2v176,w2v177,w2v178,w2v179,w2v180,w2v181,w2v182,w2v183,w2v184,w2v185,w2v186,w2v187,w2v188,w2v189,w2v190,w2v191,w2v192,w2v193,w2v194,w2v195,w2v196,w2v197,w2v198,w2v199,w2v200,w2v201,w2v202,w2v203,w2v204,w2v205,w2v206,w2v207,w2v208,w2v209,w2v210,w2v211,w2v212,w2v213,w2v214,w2v215,w2v216,w2v217,w2v218,w2v219,w2v220,w2v221,w2v222,w2v223,w2v224,w2v225,w2v226,w2v227,w2v228,w2v229,w2v230,w2v231,w2v232,w2v233,w2v234,w2v235,w2v236,w2v237,w2v238,w2v239,w2v240,w2v241,w2v242,w2v243,w2v244,w2v245,w2v246,w2v247,w2v248,w2v249,w2v250,w2v251,w2v252,w2v253,w2v254,w2v255,w2v256,w2v257,w2v258,w2v259,w2v260,w2v261,w2v262,w2v263,w2v264,w2v265,w2v266,w2v267,w2v268,w2v269,w2v270,w2v271,w2v272,w2v273,w2v274,w2v275,w2v276,w2v277,w2v278,w2v279,w2v280,w2v281,w2v282,w2v283,w2v284,w2v285,w2v286,w2v287,w2v288,w2v289,w2v290,w2v291,w2v292,w2v293,w2v294,w2v295,w2v296,w2v297,w2v298,w2v299,cool,funny,useful,stars
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1
--I7YYLada0tSLkORTHb5Q,0.016423,0.028373,-0.017573,0.015987,-0.000105,-0.000613,0.022004,-0.017549,-0.007508,-0.007643,-0.004137,0.005752,0.021792,0.00858,-0.005153,0.016274,0.011641,-0.006097,0.007114,-0.016152,-0.021916,-0.002982,0.006849,-0.020937,-0.001696,0.013037,-0.000854,-0.001098,0.000506,-0.009859,-0.008329,-0.009132,-0.000561,0.030923,-0.028243,0.010541,-0.011148,0.005102,-0.010144,0.020217,-0.001866,0.043708,-0.007104,-0.005223,-0.006323,-0.014395,-0.018785,-0.00872,0.000136,0.008391,0.001354,0.005404,0.009978,-0.00328,0.001135,0.006917,0.009708,0.002898,-0.005693,-0.006635,0.008571,-0.0069,0.021818,0.008268,0.009152,-0.004829,0.015014,0.004731,0.003607,-0.002116,0.038439,-0.007744,-0.006328,-0.002035,0.003954,-0.003805,0.001273,0.003992,-0.004673,-0.006827,-0.003431,-0.006051,0.011142,-0.001584,0.015804,-0.020293,0.01824,0.007635,-0.003957,-0.004237,-0.009845,-0.00238,-0.009501,-0.002669,0.006937,0.002659,-0.016396,0.007884,-0.004637,-0.000481,-0.004326,0.013595,-0.00826,-0.012649,0.012902,0.002506,0.003483,-0.021323,0.014243,0.018884,-0.017765,-0.013973,0.002364,0.018644,0.012008,0.01472,0.006201,0.005151,0.019246,-0.002152,-0.03451,0.00855,0.003693,0.002966,-0.00023,0.00415,-0.00636,-0.008175,-0.003261,-0.007701,-0.012668,0.003262,0.014533,-0.004862,5.7e-05,0.010327,0.01357,-0.006513,0.003236,0.011813,-0.001414,-0.010102,0.010253,-0.00662,0.000912,-0.006159,0.009072,0.014108,0.005114,0.005896,-0.008537,0.004655,-0.001175,0.034869,-0.010275,5.6e-05,0.007967,0.004398,-0.0247,0.001478,-0.005329,-0.008903,-0.005312,0.004596,0.006408,0.013327,-0.000956,-0.005602,-0.018464,0.021665,-0.019262,0.000263,-0.014808,0.000905,-0.006787,-0.008589,0.019641,0.009316,-0.00208,-0.000111,-0.002086,0.004262,-0.007424,0.011197,0.001892,0.017257,0.011649,-0.001445,0.009688,0.001303,0.00728,-0.011114,-0.015809,0.005787,-0.000729,-0.013344,0.002589,0.028851,0.000345,-0.011377,0.000261,0.0127,-0.004975,0.023475,-0.005133,0.02668,-0.002168,0.005035,-0.01226,-0.001136,-0.009256,-0.0221,-0.005014,-0.00075,0.004653,-0.002741,-0.007252,0.009529,-0.006381,0.012016,0.017946,0.003922,0.000784,-0.001312,0.014511,0.011437,0.012514,-0.003473,0.014323,-0.00278,-0.013057,0.014756,0.015251,-0.008106,-0.001643,-0.003443,0.006926,0.008021,0.006439,-0.021857,-0.001257,-0.000578,0.003006,-0.001827,-0.025314,0.016381,0.011534,-0.006476,-0.007396,0.007404,0.00877,0.022904,0.01604,0.020333,0.002619,0.00201,-0.020388,0.002111,0.011524,0.006622,0.006941,0.002426,-0.000127,-0.003453,0.000524,-0.0101,0.008531,-0.02123,-0.017735,-0.02621,-0.005717,-0.013019,-0.003495,-0.001274,0.01131,-0.009379,-0.017453,-0.00117,0.014352,-0.002489,0.019097,0.003758,-0.009179,0.001424,-0.006294,-0.009914,-0.018522,-0.009218,0.004427,-0.0106,-0.013605,0.002903,0.005445,0.01152,0.016746,-0.005915,0.024629,-0.007327,-0.022198,0.001746,0.352941,0.352941,0.823529,3.647059
--U98MNlDym2cLn36BBPgQ,0.009526,0.020788,-0.013227,0.037498,0.006747,-0.004874,0.012273,-0.024117,0.008519,-0.009936,-0.00301,-0.007703,0.018541,0.022424,-0.00855,0.003445,0.014191,0.000926,0.001358,-0.00626,-0.01699,0.007976,-0.001236,-0.029893,-0.004399,0.022837,-0.005918,0.008592,0.012597,-0.005725,0.009391,0.000858,0.003131,0.034826,-0.026948,-0.00441,-0.012987,-0.005924,-0.012023,0.018018,0.005187,0.033649,-0.007751,-0.013777,0.004336,-0.018224,-0.030893,-0.015549,0.008527,0.005289,-0.000202,-0.001845,0.011829,-0.001018,0.005172,0.001364,-0.001056,-0.00733,-0.018776,-0.007521,0.00076,-0.013104,0.003671,-0.007366,-0.001656,0.011103,0.002937,0.001705,0.015391,-0.006962,0.000296,-0.005424,-0.024318,0.011868,0.022309,0.015153,-0.003186,-0.003033,0.002904,-0.014292,0.003285,0.004305,0.017303,0.003647,0.003455,-0.006365,0.00517,0.001712,-0.004432,-0.006427,-0.014719,-0.003795,-0.002669,-0.008352,0.002823,0.005066,-0.005835,0.005526,-0.00072,0.001431,-0.005034,0.006255,0.009896,-0.020154,0.006983,0.001042,-0.009043,-0.007399,0.003431,0.011243,-0.020616,-0.006034,-0.004156,0.004347,0.006449,-0.003178,0.005243,-0.000175,9.5e-05,-0.005957,-0.01748,0.004124,0.006667,0.000624,-0.003606,-0.012922,-0.010647,-0.01105,0.000661,-0.008315,0.005649,0.004742,0.014144,-0.001626,-0.003618,0.004101,-0.007025,0.004233,0.001314,0.007004,0.002882,-0.005765,-0.005438,-0.008123,-0.006626,-0.000191,-0.015236,0.017853,0.002019,0.007784,-0.004522,0.003228,-0.011678,0.015277,-0.000337,-0.006169,0.007778,0.012632,-0.00437,0.005508,-0.005371,0.011217,-0.006899,0.004853,0.021217,0.004433,-0.008672,-0.00017,-0.013791,0.003937,-0.008665,0.002376,-0.005243,-0.003049,-0.011634,0.0051,0.01349,-0.000978,0.000452,-0.00491,0.007104,-0.008656,-0.013201,0.009715,0.00019,0.018675,-0.004282,-0.01235,-0.005294,-0.003098,0.0043,-0.010867,-0.008232,-0.005555,-0.008416,-0.000438,0.006088,0.034031,-0.005951,-0.011274,-0.006163,0.022856,0.00466,0.016616,-0.017315,0.029941,-0.004681,-0.006773,0.003573,0.000269,-0.003365,-0.020757,-0.007967,0.001097,0.001597,-0.014545,0.008238,-0.002162,-0.0011,0.008331,0.006731,-0.012262,0.011174,-0.002553,0.003972,0.011859,-0.002478,0.000185,0.008342,-0.00687,0.009072,0.005089,0.008168,-0.008667,-0.008383,0.007071,0.007966,-0.003892,-0.011247,-0.01095,-0.000924,-0.000457,-0.000581,-0.006893,-0.014215,0.00518,0.012293,-0.005615,-0.00375,0.005397,0.005264,0.009632,0.017666,0.002859,0.005952,0.003424,-0.016514,0.003168,0.005319,0.009492,0.005101,0.00985,0.000228,-0.007808,-0.010544,-0.005459,-0.003861,-0.037872,-0.017202,-0.018502,-0.000997,-0.002113,-0.005247,0.004077,-0.006937,-0.012416,0.001015,0.002865,0.007258,0.016955,0.000729,0.008853,-0.017975,-0.006315,-0.006089,5.3e-05,-0.006383,-0.025554,-0.010488,-0.009348,-0.004467,-0.008314,0.012735,0.017171,0.013198,0.00206,0.010875,-0.012588,-0.019368,-0.005236,0.0,0.0,2.0,3.0
--j-kaNMCo1-DYzddCsA5Q,0.035586,0.022514,-0.006759,-0.034605,-0.0043,-0.001136,0.001259,-0.030018,-0.024447,-0.019531,0.013469,-0.015355,0.014754,-0.012143,0.011463,0.011731,0.034843,0.023824,0.004841,-0.003642,0.03355,0.023334,0.040423,-0.025388,-0.005392,-0.012638,0.007508,-0.019039,-0.013676,0.005799,-0.002171,-0.031701,0.005618,0.007776,-0.01297,0.015142,-0.005067,0.002006,-0.001829,0.037603,0.003339,0.036467,-0.016766,0.020584,0.0084,0.001669,-0.013346,-0.013458,-0.011666,-0.016915,0.003241,0.013855,0.011129,-0.030245,0.008177,-0.018768,0.018454,0.008811,-5.9e-05,-0.003335,-0.006106,-0.033478,0.025956,0.018926,-0.01407,0.005113,0.02893,-0.006716,-0.008166,0.026749,-0.004555,-0.006962,-0.006616,-0.028545,0.008256,-0.011582,0.034356,-0.004741,-0.026255,-0.022443,-0.003017,0.000753,0.00483,0.011377,0.022869,-0.016355,-0.01471,0.012377,-0.015632,-0.002214,-0.030018,0.002821,-0.037887,-0.009241,0.031782,0.017117,-0.011676,-0.028895,-0.016502,-0.038487,-0.010697,-0.020796,-0.027077,-0.015862,-0.010757,0.011843,0.006723,6.8e-05,0.025043,0.001866,0.00963,0.005317,0.003732,0.030519,0.019011,-0.013259,-0.005914,-0.014707,-0.022952,0.026195,-0.041787,0.016181,-0.022299,-0.006052,-0.022288,0.02067,0.010472,-0.006911,0.003276,0.00254,-0.023013,0.009655,0.016446,0.018688,0.020813,0.026653,0.014267,0.003966,0.039087,0.036969,0.008616,0.015359,0.000131,0.018841,-0.03424,-0.047229,0.012753,-0.022866,-0.030703,-0.003877,0.010786,0.00024,0.004964,-0.017763,0.020084,-0.037403,0.020078,0.018008,-0.036033,-0.034225,-0.030038,-0.000533,0.00824,0.007428,0.014448,0.034219,0.03488,-0.003483,-0.016695,0.038513,-0.028679,-2.1e-05,-0.011349,0.004167,0.016462,-0.016115,0.001255,0.0207,-0.006923,-0.016865,0.002612,0.004554,0.021733,-0.005121,-0.011707,0.020933,-0.013895,-0.021148,0.013658,-0.017727,-0.016137,-0.007611,0.001611,0.025292,-0.030883,-0.025738,0.009383,0.014083,0.004231,-0.010233,0.00343,0.017012,-0.02475,0.027743,0.036476,0.018355,-0.00885,-0.010797,-0.003072,0.024055,-0.00457,-0.01121,0.010668,0.01257,0.026892,-0.025917,-0.0182,0.039035,0.020572,0.014087,-0.007038,0.019014,-0.005364,0.002627,0.018733,0.017483,0.014418,-0.024293,0.019427,0.020707,-0.013925,0.005169,0.013683,0.00964,-0.010311,-0.018649,-0.007244,-0.011057,-0.017467,-0.004436,-0.035974,-0.007311,0.009026,0.015625,-0.012384,-0.057747,0.007954,-0.021706,-0.016572,0.025558,-0.000519,0.025739,0.000586,0.010865,0.019225,-0.005994,-0.000922,-0.007387,0.005063,-0.022474,-0.001555,0.001445,-0.02054,-0.004993,-0.004842,-0.016322,-0.001742,-0.014826,0.010301,-0.023069,0.011577,-0.008462,0.000272,0.005402,-0.011265,-0.000371,0.018296,-0.011073,0.001637,-0.024963,0.02022,-0.005851,-0.021839,-0.045366,0.00491,-0.001685,0.008545,-0.054177,-0.001469,-0.014475,0.000184,0.002357,0.01168,0.009636,0.027132,0.029206,0.035708,-0.035958,-0.010643,0.003613,0.0,0.0,0.0,5.0
--wIGbLEhlpl_UeAIyDmZQ,-0.006451,0.005224,-0.009912,0.01222,-0.001507,0.014311,-0.004426,-0.013908,-0.003054,-0.003489,-0.005158,-0.013398,-0.009127,-0.013979,-0.00975,0.015487,0.007607,0.013315,-0.011175,-0.009165,0.002724,-0.008716,-0.013071,-0.0234,0.003305,0.020969,-0.002856,-0.001024,-0.011011,0.005339,-2.6e-05,-0.009941,-0.001624,-0.009941,0.00411,0.010796,0.001066,-0.000427,0.006601,0.025162,0.008402,0.005168,0.008002,-0.001039,0.01567,0.017713,0.00148,-0.007099,-0.009715,0.002707,-0.005726,0.007443,-0.017098,-0.014837,-0.004003,-0.00451,0.007199,-7.2e-05,0.003028,0.004661,-0.001873,-0.009008,-0.001957,-0.012551,-0.001671,0.026746,-0.015515,-0.033926,0.000387,0.013197,-0.04292,0.004932,-0.013605,0.010155,0.01418,0.030974,-0.010119,-0.013864,-0.004738,-0.023016,0.00213,0.012809,0.012343,0.019651,0.004754,-0.006894,-0.005952,0.012059,-0.006827,0.020048,-0.002721,-0.013028,-0.00936,-0.016978,0.018649,0.024557,-0.001547,-0.002572,0.003069,-0.006623,-0.021931,-0.007246,-0.004054,-0.031499,-0.021295,0.02005,-0.005289,0.023503,-0.011998,-0.022676,-8.7e-05,-0.007599,-0.023538,0.009769,-0.017149,-0.030261,-0.011132,-0.00241,-0.030899,0.006051,0.001235,0.000696,-0.009934,-0.021671,-0.020202,-0.007275,0.010149,-0.025803,-0.004109,0.000107,0.001046,0.025114,0.005574,-0.023765,0.006369,0.013491,-0.014562,-0.010531,-0.00315,0.009234,0.011258,0.010218,-0.006969,-0.012741,-0.00551,0.005818,-0.019811,0.00496,-0.006015,-0.007071,0.011693,0.005291,-0.014156,-0.009118,0.000954,0.001818,0.009421,0.006589,0.008889,0.002381,-0.022533,0.022815,-0.007912,-0.02021,0.035407,0.01739,-0.000898,-0.004029,-0.000687,0.011396,0.000525,0.006189,0.011964,-0.001504,-0.007669,-0.002903,-0.001048,0.001073,0.00497,-0.019298,-0.001323,-0.017552,-0.018586,-0.01448,0.012289,0.014894,-0.006163,0.010517,-0.022579,0.010857,-0.001869,-0.002703,-0.008702,-0.005797,-0.008647,-0.005226,0.016426,0.007479,-0.027694,-0.012755,0.016212,0.005117,-0.018269,0.002022,0.008764,0.019119,-0.006846,0.000805,0.006318,0.005565,0.003548,-0.005001,-0.0042,0.004131,-0.008311,-0.019334,-0.005256,-0.002412,-0.000262,0.005065,-0.016332,0.00191,-0.010828,-0.015169,0.009904,0.013332,-0.005678,0.0022,-0.001355,0.001587,0.00186,0.000814,0.003844,-0.00014,-0.002234,-0.000621,0.015916,-0.011911,-0.014774,-0.013549,0.002547,0.010617,-0.003541,-0.001351,0.018963,0.002975,-0.014302,0.01447,0.005929,0.015377,0.015909,-0.001553,0.009135,-0.01686,0.022797,0.001344,-0.001511,-0.00185,0.004181,0.006768,0.017253,0.005129,0.009166,-0.012016,-0.016002,0.004678,-0.024996,-0.026095,0.010083,-0.01217,0.007552,0.002216,-0.003668,-0.01454,-0.013339,0.009703,0.002095,0.011977,-0.003552,0.011336,-0.002475,-0.010721,-0.009994,-0.008097,-0.013916,-0.002644,-0.015977,-0.031741,-0.008459,-0.002852,0.00265,-0.014241,0.017916,0.007721,0.023303,0.010598,0.007041,-0.007397,-0.002891,-0.002526,0.666667,0.166667,3.0,3.833333
-000aQFeK6tqVLndf7xORg,-0.007433,-1.1e-05,-0.009821,0.013711,-0.007697,0.009369,-0.000267,-0.014385,0.013446,-0.006545,-0.004254,-0.00884,0.006065,-0.020545,-0.014723,0.021661,0.007791,0.023143,-0.019433,-0.010503,0.016465,0.003757,-0.001521,-0.019992,0.001518,0.018253,-0.008955,-0.009514,-0.018659,0.013087,-0.008339,-0.016071,0.002549,-0.015986,0.011783,0.0182,0.001336,-0.005553,-0.003634,0.033946,0.020831,0.002149,0.002212,-0.004911,0.015449,0.023007,0.004104,-0.006299,-0.017385,-0.009145,-0.008275,0.01966,-0.010135,-0.007654,0.009172,-0.020482,-0.005627,0.003912,-0.009415,0.001919,-0.004439,-0.014971,0.014464,0.007223,-0.014237,0.027177,-0.003932,-0.02755,-0.009708,0.008284,-0.052534,-0.002317,-0.020593,0.014291,0.041985,0.045631,-0.00363,-0.023346,-0.016834,-0.036259,0.003925,0.019696,0.027176,0.02353,0.00804,-0.015238,-0.004602,0.028189,-0.00878,0.026455,-0.004413,-0.023286,-0.020831,-0.013712,0.02049,0.017521,-0.004392,-0.004432,-0.004666,-0.01965,-0.015279,-0.023229,-0.009894,-0.02654,-0.015425,0.026099,-0.009173,0.025814,-0.002449,-0.024673,0.001656,-0.002645,-0.024472,0.009885,0.002412,-0.021211,-0.011158,-0.004071,-0.039804,0.015876,-0.007912,0.00298,-0.028761,-0.027686,-0.021079,0.0001,0.002395,-0.029705,-0.010856,-0.012114,-0.010032,0.024644,0.008433,-0.023729,-0.007477,0.028917,-0.010986,-0.016497,0.009415,0.018289,0.004225,0.013398,-0.003217,-0.014722,-0.009774,-0.020824,-0.011235,0.010129,0.004694,-0.01414,0.011297,0.00864,0.003554,-0.013905,0.010194,-0.000588,0.016452,0.005398,0.00949,-0.016907,-0.019449,0.019247,-0.018269,-0.022054,0.035479,0.017517,0.014358,-0.00592,-0.005499,0.022577,-0.002098,0.028162,0.028012,-0.009083,-0.010663,0.003411,-0.007584,-0.008702,-0.003066,-0.024061,0.009006,-0.005962,-0.008443,-0.012843,0.014679,0.012292,-0.010667,0.011949,-0.019478,0.004063,-0.001255,-0.01176,-0.01194,-0.015718,-0.016672,-0.022398,0.019692,0.009881,-0.025308,-0.017314,0.027234,0.002567,-0.018215,-0.002731,0.014089,0.018067,-0.008646,-0.002019,0.008941,0.01073,0.011453,-0.008433,-0.005288,0.0013,0.010854,-0.024225,-0.011229,-0.009023,0.00282,0.020369,-0.027245,-0.005153,-0.01309,0.00477,0.013618,0.008284,0.007004,-0.000237,-0.009649,0.006179,-0.003944,-0.002354,0.006194,-0.01096,-0.011094,-0.018331,0.02691,-0.010754,-0.030586,-0.006552,-0.001103,0.014271,0.00219,0.010393,0.018642,-0.003907,-0.015776,0.002287,0.002076,0.017322,0.026583,-0.005787,-0.001431,-0.017098,0.030672,0.003212,-0.00348,-0.009102,0.006372,0.002701,0.032321,0.009328,-0.014496,-0.021993,-0.022041,-0.006105,-0.009757,-0.03687,0.018777,-0.028428,0.005373,-0.006823,-0.002413,-0.016446,-0.007992,0.013408,0.008006,0.021931,-0.015675,0.015636,0.001517,-0.024616,-0.013544,-0.026681,-0.004654,0.006673,-0.019194,-0.039296,-0.002843,0.004196,-0.007143,-0.012022,0.017546,0.011267,0.038164,0.014851,0.016622,-0.022971,0.00502,0.014055,0.666667,0.0,0.0,5.0


In [21]:
all_features_business.describe()

Unnamed: 0,w2v0,w2v1,w2v2,w2v3,w2v4,w2v5,w2v6,w2v7,w2v8,w2v9,w2v10,w2v11,w2v12,w2v13,w2v14,w2v15,w2v16,w2v17,w2v18,w2v19,w2v20,w2v21,w2v22,w2v23,w2v24,w2v25,w2v26,w2v27,w2v28,w2v29,w2v30,w2v31,w2v32,w2v33,w2v34,w2v35,w2v36,w2v37,w2v38,w2v39,w2v40,w2v41,w2v42,w2v43,w2v44,w2v45,w2v46,w2v47,w2v48,w2v49,w2v50,w2v51,w2v52,w2v53,w2v54,w2v55,w2v56,w2v57,w2v58,w2v59,w2v60,w2v61,w2v62,w2v63,w2v64,w2v65,w2v66,w2v67,w2v68,w2v69,w2v70,w2v71,w2v72,w2v73,w2v74,w2v75,w2v76,w2v77,w2v78,w2v79,w2v80,w2v81,w2v82,w2v83,w2v84,w2v85,w2v86,w2v87,w2v88,w2v89,w2v90,w2v91,w2v92,w2v93,w2v94,w2v95,w2v96,w2v97,w2v98,w2v99,w2v100,w2v101,w2v102,w2v103,w2v104,w2v105,w2v106,w2v107,w2v108,w2v109,w2v110,w2v111,w2v112,w2v113,w2v114,w2v115,w2v116,w2v117,w2v118,w2v119,w2v120,w2v121,w2v122,w2v123,w2v124,w2v125,w2v126,w2v127,w2v128,w2v129,w2v130,w2v131,w2v132,w2v133,w2v134,w2v135,w2v136,w2v137,w2v138,w2v139,w2v140,w2v141,w2v142,w2v143,w2v144,w2v145,w2v146,w2v147,w2v148,w2v149,w2v150,w2v151,w2v152,w2v153,w2v154,w2v155,w2v156,w2v157,w2v158,w2v159,w2v160,w2v161,w2v162,w2v163,w2v164,w2v165,w2v166,w2v167,w2v168,w2v169,w2v170,w2v171,w2v172,w2v173,w2v174,w2v175,w2v176,w2v177,w2v178,w2v179,w2v180,w2v181,w2v182,w2v183,w2v184,w2v185,w2v186,w2v187,w2v188,w2v189,w2v190,w2v191,w2v192,w2v193,w2v194,w2v195,w2v196,w2v197,w2v198,w2v199,w2v200,w2v201,w2v202,w2v203,w2v204,w2v205,w2v206,w2v207,w2v208,w2v209,w2v210,w2v211,w2v212,w2v213,w2v214,w2v215,w2v216,w2v217,w2v218,w2v219,w2v220,w2v221,w2v222,w2v223,w2v224,w2v225,w2v226,w2v227,w2v228,w2v229,w2v230,w2v231,w2v232,w2v233,w2v234,w2v235,w2v236,w2v237,w2v238,w2v239,w2v240,w2v241,w2v242,w2v243,w2v244,w2v245,w2v246,w2v247,w2v248,w2v249,w2v250,w2v251,w2v252,w2v253,w2v254,w2v255,w2v256,w2v257,w2v258,w2v259,w2v260,w2v261,w2v262,w2v263,w2v264,w2v265,w2v266,w2v267,w2v268,w2v269,w2v270,w2v271,w2v272,w2v273,w2v274,w2v275,w2v276,w2v277,w2v278,w2v279,w2v280,w2v281,w2v282,w2v283,w2v284,w2v285,w2v286,w2v287,w2v288,w2v289,w2v290,w2v291,w2v292,w2v293,w2v294,w2v295,w2v296,w2v297,w2v298,w2v299,cool,funny,useful,stars
count,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13942.0,13943.0,13943.0,13943.0,13943.0
mean,0.00708,0.016276,-0.00861,0.01315,-0.003761,0.006367,0.001918,-0.015486,-0.008427,-0.006332,-0.001224,-0.009088,0.006823,-0.003458,-0.001781,0.00985,0.012324,0.002345,-0.001627,-0.010833,-0.007057,-0.003667,0.000156,-0.02262,0.003001,0.00915,2e-05,0.003259,-0.00792,0.001406,0.001464,-0.010276,0.000689,0.007157,-0.007346,0.008118,-0.005013,0.002505,-0.003501,0.028295,0.003829,0.021981,-0.001375,0.00079,0.005797,0.006338,-0.007441,-0.008883,-0.005429,0.00631,-0.000772,0.009688,-0.003079,-0.008413,-0.002097,0.001448,0.006949,-0.001421,0.000679,-0.001419,-0.000607,-0.007912,0.006895,0.002954,-0.0041,0.010578,0.002674,-0.008185,0.002699,0.005281,-0.012502,-0.001784,-0.01189,0.001734,0.015885,0.018122,0.003417,-0.011557,-0.004681,-0.021999,0.003706,0.003798,0.016942,0.007771,0.007176,-0.014233,0.00188,0.011009,-0.005856,0.008408,-0.009572,-0.009041,-0.013584,-0.009588,0.018393,0.009898,-0.006749,-0.003798,-0.00755,-0.007971,-0.013756,-0.006189,-0.004499,-0.024044,-0.000163,0.011761,-0.006992,0.003578,-0.001222,0.000494,-0.004307,-0.004001,-0.007968,0.007368,0.00054,-0.011424,-0.001708,-0.001467,-0.012017,0.005076,-0.019545,0.005926,-0.005925,-0.010029,-0.013698,-0.002896,0.00694,-0.013513,-0.004695,-0.00535,-0.007539,0.003802,0.009327,-0.001799,0.00577,0.008267,0.00442,-0.000656,0.007464,0.015549,0.004121,0.003838,0.000477,-0.005486,-0.007678,-0.008654,-0.00241,0.001636,-0.003619,-0.000561,-0.001216,0.004758,-0.005786,0.006756,0.000536,-0.001066,0.006807,0.008629,-0.009434,-0.004191,-0.010386,0.00019,-0.007693,-0.005236,0.021587,0.015174,0.002902,-0.00119,-0.010483,0.020371,-0.011047,0.004426,-0.001689,0.005912,-0.005544,-0.007094,0.004604,0.002548,-0.001939,-0.013544,0.001678,-0.007041,-0.003344,0.000105,0.003676,0.015041,-0.002582,-0.001886,-0.004488,8.5e-05,0.002616,-0.006907,-0.004124,0.003174,-0.008474,-0.010699,0.005971,0.01447,-0.009027,-0.008953,0.006632,0.007808,-0.006246,0.011021,0.008277,0.01753,-0.0045,-0.00152,0.001503,0.0042,-0.002666,-0.010925,-0.002316,0.000238,0.004751,-0.014014,-0.00122,0.007348,0.005666,0.011454,-0.004344,0.000411,-0.000996,-0.003935,0.011022,0.008361,0.006664,-0.002435,0.006728,0.001503,-0.003206,0.001798,0.006072,-0.001957,-0.002032,-0.003813,0.004001,-0.001422,-0.008597,-0.00552,-0.004068,-0.00396,0.000302,0.002353,-0.005202,-0.004718,-0.001702,-0.004553,-0.002179,0.00811,0.006996,0.012458,0.009381,0.004185,0.013738,0.001664,-0.005699,-0.000613,0.011776,-0.003071,0.012224,-0.001016,-0.004084,-0.002856,-0.007834,-0.007086,-0.000776,-0.022162,0.004548,-0.0225,-0.001235,-0.006713,-0.006693,-0.006064,-0.001142,0.006474,-0.000473,0.001907,9.3e-05,-0.001416,0.006326,-0.006995,-0.009883,-0.009184,-0.001242,-0.007976,-0.010904,-0.023398,7.2e-05,-0.005066,-0.005431,-0.008934,0.013828,0.007931,0.018126,0.002902,0.01278,-0.01457,-0.009464,0.004033,0.486991,0.423987,1.434996,3.615964
std,0.01554,0.012944,0.014396,0.024892,0.018633,0.008587,0.014418,0.012549,0.014241,0.014275,0.011323,0.018902,0.016522,0.014457,0.011241,0.012871,0.012133,0.014223,0.014394,0.012505,0.021097,0.012976,0.013166,0.010789,0.010887,0.01297,0.010908,0.01561,0.018832,0.012652,0.012791,0.013271,0.009449,0.023644,0.016305,0.009109,0.013197,0.010634,0.00962,0.016298,0.010442,0.018374,0.011872,0.016207,0.014109,0.019247,0.018482,0.011185,0.012628,0.011145,0.011179,0.010322,0.014105,0.012166,0.011652,0.013261,0.011474,0.009926,0.016424,0.013972,0.012145,0.012775,0.015313,0.013606,0.012361,0.01706,0.017344,0.014976,0.011866,0.010228,0.035195,0.010535,0.009503,0.015888,0.014605,0.016007,0.013366,0.013903,0.010848,0.019386,0.009937,0.015049,0.011696,0.015794,0.01031,0.01095,0.016392,0.010794,0.009374,0.013694,0.014104,0.012822,0.012926,0.01097,0.011121,0.012688,0.012223,0.011184,0.011422,0.010881,0.012089,0.017347,0.013951,0.009968,0.02109,0.012478,0.013139,0.018532,0.011308,0.020974,0.014435,0.014603,0.015573,0.011801,0.014269,0.020078,0.011412,0.009769,0.02347,0.013076,0.015318,0.010506,0.015334,0.016753,0.012342,0.011507,0.012253,0.011813,0.012568,0.01102,0.012791,0.020324,0.011668,0.012997,0.010705,0.011115,0.020405,0.010665,0.011943,0.010815,0.009642,0.012547,0.015502,0.013033,0.014308,0.016949,0.015739,0.012597,0.012026,0.010656,0.012473,0.008579,0.009712,0.025141,0.011263,0.010982,0.010948,0.012613,0.016618,0.013034,0.011657,0.014051,0.009597,0.016784,0.01706,0.011917,0.012122,0.010558,0.010284,0.013051,0.012348,0.012194,0.017468,0.012232,0.012201,0.009339,0.013646,0.015415,0.014238,0.013318,0.010144,0.013928,0.012263,0.017296,0.01218,0.010917,0.009973,0.011236,0.015456,0.011805,0.010789,0.010742,0.013287,0.012901,0.013441,0.013453,0.012671,0.012586,0.013346,0.008135,0.011634,0.010483,0.013667,0.014035,0.016936,0.013052,0.008937,0.013963,0.010976,0.009924,0.010746,0.012216,0.00914,0.009201,0.009377,0.018228,0.013173,0.013113,0.009897,0.012455,0.019198,0.010446,0.010071,0.012717,0.009698,0.010861,0.01058,0.010723,0.012418,0.011543,0.012662,0.013282,0.013665,0.010959,0.011469,0.012618,0.016341,0.014178,0.015233,0.013011,0.014132,0.013156,0.010402,0.012915,0.018353,0.019377,0.014747,0.012588,0.008189,0.011664,0.011047,0.013569,0.01002,0.019384,0.01437,0.010235,0.014106,0.011783,0.010792,0.014057,0.008939,0.011734,0.012359,0.013888,0.014459,0.010247,0.014035,0.013745,0.018519,0.011383,0.011817,0.012363,0.008507,0.010786,0.013457,0.016902,0.013264,0.013824,0.013779,0.018325,0.012649,0.010987,0.010749,0.015136,0.013619,0.012081,0.010711,0.017327,0.01241,0.012849,0.0107,0.012065,0.012741,0.009886,0.013959,0.013635,0.010917,0.012594,0.013478,0.010081,1.299472,1.070148,2.371442,1.277067
min,-0.103618,-0.045055,-0.075968,-0.127295,-0.112152,-0.042858,-0.056598,-0.092377,-0.095111,-0.072498,-0.092387,-0.087607,-0.082485,-0.072333,-0.063792,-0.064,-0.053064,-0.078378,-0.097343,-0.136704,-0.1052,-0.084593,-0.120736,-0.087313,-0.063458,-0.073761,-0.059174,-0.065158,-0.090041,-0.045856,-0.066871,-0.086646,-0.059475,-0.106642,-0.066367,-0.045645,-0.07807,-0.050698,-0.079332,-0.06296,-0.067056,-0.051013,-0.063255,-0.069444,-0.059402,-0.073442,-0.095784,-0.082363,-0.083671,-0.054808,-0.099475,-0.047754,-0.112132,-0.086075,-0.074882,-0.062622,-0.058505,-0.059984,-0.083755,-0.070361,-0.053528,-0.0845,-0.069703,-0.060945,-0.055774,-0.062324,-0.091822,-0.085864,-0.112898,-0.050365,-0.170294,-0.06101,-0.078989,-0.067864,-0.039826,-0.060033,-0.063719,-0.087302,-0.064281,-0.137183,-0.099569,-0.064263,-0.034066,-0.085719,-0.065574,-0.071229,-0.064761,-0.051776,-0.071258,-0.077935,-0.073157,-0.104003,-0.101127,-0.053284,-0.034526,-0.099386,-0.077449,-0.08179,-0.08296,-0.062836,-0.085478,-0.090717,-0.079087,-0.099886,-0.086761,-0.036569,-0.090925,-0.069638,-0.060877,-0.083271,-0.071947,-0.121076,-0.100731,-0.082547,-0.086388,-0.205143,-0.05253,-0.055837,-0.123214,-0.068826,-0.095751,-0.054564,-0.078897,-0.098213,-0.067406,-0.058335,-0.049733,-0.083261,-0.080157,-0.0918,-0.080154,-0.127224,-0.050707,-0.068445,-0.043721,-0.052711,-0.070757,-0.069941,-0.0561,-0.037294,-0.05356,-0.08602,-0.064207,-0.095615,-0.071017,-0.089112,-0.080603,-0.07603,-0.0604,-0.073701,-0.066647,-0.045027,-0.064417,-0.084018,-0.103859,-0.056374,-0.051457,-0.045655,-0.08022,-0.071723,-0.071078,-0.076113,-0.057384,-0.076607,-0.084004,-0.049747,-0.057535,-0.072358,-0.077753,-0.051394,-0.080225,-0.07946,-0.064835,-0.061663,-0.071053,-0.058201,-0.071262,-0.143739,-0.080362,-0.083135,-0.066462,-0.101439,-0.105547,-0.095254,-0.053898,-0.091993,-0.070884,-0.093493,-0.065206,-0.076702,-0.072725,-0.086644,-0.10578,-0.069267,-0.082581,-0.114005,-0.058528,-0.052293,-0.068289,-0.054587,-0.047399,-0.041402,-0.086042,-0.079317,-0.099696,-0.083044,-0.053776,-0.059664,-0.047459,-0.047634,-0.074241,-0.070547,-0.107206,-0.045781,-0.042802,-0.082568,-0.067473,-0.060342,-0.046472,-0.072932,-0.080924,-0.067067,-0.071455,-0.075315,-0.07042,-0.042362,-0.064709,-0.057385,-0.059824,-0.083577,-0.066776,-0.053623,-0.054674,-0.080062,-0.069821,-0.150502,-0.077648,-0.056168,-0.093858,-0.078418,-0.078086,-0.074645,-0.060113,-0.056906,-0.068529,-0.088199,-0.058778,-0.073292,-0.051392,-0.081261,-0.064837,-0.057044,-0.053119,-0.076014,-0.077758,-0.081067,-0.070684,-0.066701,-0.048821,-0.062845,-0.034603,-0.12612,-0.067773,-0.081341,-0.066832,-0.087815,-0.061064,-0.083551,-0.076387,-0.133455,-0.073133,-0.096779,-0.054399,-0.079442,-0.068382,-0.071915,-0.07853,-0.082726,-0.06683,-0.098292,-0.045751,-0.05953,-0.094589,-0.06977,-0.075182,-0.067318,-0.067113,-0.109475,-0.06015,-0.063299,-0.08951,-0.066356,-0.054066,-0.059693,-0.055659,-0.089693,-0.069629,-0.085765,-0.074833,-0.054541,0.0,0.0,0.0,1.0
25%,-0.003049,0.008878,-0.017373,-0.001154,-0.014281,0.001081,-0.008472,-0.023362,-0.017819,-0.014199,-0.00657,-0.020912,-0.004226,-0.013518,-0.008543,0.002766,0.005113,-0.006937,-0.01131,-0.015789,-0.022003,-0.011228,-0.007945,-0.02923,-0.003556,0.001063,-0.006911,-0.005223,-0.01985,-0.007864,-0.006044,-0.017825,-0.00464,-0.009734,-0.02044,0.002461,-0.01329,-0.003698,-0.009307,0.018063,-0.002151,0.008665,-0.008956,-0.008702,-0.004088,-0.010374,-0.020366,-0.015534,-0.012358,0.000123,-0.007319,0.00323,-0.011628,-0.015701,-0.008993,-0.008041,0.000171,-0.007421,-0.009831,-0.009234,-0.008518,-0.015154,-0.003227,-0.005853,-0.011905,-0.001462,-0.007849,-0.018864,-0.004162,-0.001902,-0.039084,-0.008164,-0.017527,-0.009234,0.005724,0.007169,-0.004884,-0.020816,-0.011161,-0.036189,-0.002196,-0.006568,0.009868,-0.004152,0.001125,-0.02075,-0.009906,0.004801,-0.011217,-0.001388,-0.018383,-0.016654,-0.020993,-0.016911,0.010938,0.001323,-0.014502,-0.010703,-0.014764,-0.014992,-0.020777,-0.018549,-0.012831,-0.029956,-0.013911,0.004628,-0.013551,-0.010797,-0.008154,-0.015972,-0.013987,-0.01146,-0.018707,0.001213,-0.008514,-0.022348,-0.009143,-0.00702,-0.030804,-0.003311,-0.029949,-6.8e-05,-0.016749,-0.022606,-0.021739,-0.010477,-0.000917,-0.02066,-0.012322,-0.011636,-0.01498,-0.00904,0.001787,-0.009231,-0.000712,0.001368,-0.010994,-0.007437,0.000212,0.008541,-0.00154,-0.003166,-0.01064,-0.014029,-0.017258,-0.017894,-0.011822,-0.006017,-0.011379,-0.007676,-0.009241,-0.000436,-0.01141,-0.013204,-0.006341,-0.007543,0.000315,0.000909,-0.022061,-0.012036,-0.01791,-0.009225,-0.013376,-0.016527,0.010562,0.007652,-0.004643,-0.007222,-0.017052,0.012468,-0.018598,-0.003511,-0.015514,-0.000977,-0.013118,-0.012547,-0.003718,-0.007011,-0.011107,-0.023056,-0.00407,-0.015612,-0.010464,-0.0121,-0.003463,0.009,-0.009043,-0.008015,-0.016124,-0.006316,-0.003686,-0.012864,-0.012266,-0.005107,-0.017325,-0.01734,-0.002361,0.005798,-0.016829,-0.013622,-0.001092,0.001614,-0.014136,0.001468,-0.002932,0.010135,-0.009779,-0.0102,-0.006526,-0.001736,-0.009417,-0.019344,-0.007577,-0.005031,-0.000751,-0.026496,-0.00993,-0.000981,-0.000567,0.004118,-0.019548,-0.00581,-0.007497,-0.011755,0.005272,0.001735,0.000444,-0.008876,-0.000516,-0.005214,-0.010778,-0.007675,-0.002835,-0.008809,-0.008558,-0.010975,-0.006534,-0.011239,-0.018359,-0.013947,-0.013957,-0.012375,-0.005802,-0.006352,-0.020712,-0.017131,-0.013023,-0.012603,-0.007173,0.000549,0.000179,0.004303,0.003743,-0.010314,0.004728,-0.003353,-0.015927,-0.008066,0.005403,-0.012468,0.006824,-0.007837,-0.011391,-0.012554,-0.018332,-0.013582,-0.010497,-0.030854,-0.01074,-0.028994,-0.008928,-0.013531,-0.011934,-0.012354,-0.010292,-0.005782,-0.008423,-0.005912,-0.008987,-0.012468,-0.001471,-0.013982,-0.016087,-0.019789,-0.009807,-0.015265,-0.017459,-0.034436,-0.00786,-0.01414,-0.012112,-0.016504,0.005949,0.001847,0.009976,-0.006281,0.006503,-0.022626,-0.018209,-0.002484,0.0,0.0,0.15251,3.0
50%,0.008021,0.016514,-0.010473,0.014938,-0.002578,0.00624,0.002227,-0.015916,-0.009443,-0.00628,-0.00093,-0.01058,0.006891,-0.00353,-0.00214,0.010038,0.012895,0.002814,-0.000573,-0.010121,-0.010503,-0.003701,0.000256,-0.022559,0.00307,0.009321,0.000627,0.002765,-0.006142,3.5e-05,0.002023,-0.009728,0.000801,0.008086,-0.004729,0.00762,-0.005854,0.003014,-0.003416,0.027902,0.003124,0.021524,-0.002078,-0.000731,0.005229,0.006836,-0.008094,-0.009679,-0.00473,0.006551,0.000109,0.009305,-0.002989,-0.008359,-0.001948,0.000665,0.007697,-0.001365,-0.000185,-0.002311,-0.001734,-0.007296,0.007288,0.002636,-0.003825,0.009494,0.003945,-0.007847,0.003008,0.005105,-0.015038,-0.002398,-0.011439,0.000908,0.015389,0.017148,0.002682,-0.010436,-0.004514,-0.023908,0.003955,0.003744,0.016719,0.007166,0.007218,-0.013954,-0.000435,0.010563,-0.006191,0.009018,-0.010322,-0.009014,-0.013091,-0.010061,0.017296,0.010131,-0.007407,-0.003292,-0.008101,-0.007251,-0.01272,-0.006396,-0.004154,-0.023753,-0.000841,0.010772,-0.005804,0.004816,-0.000994,-0.002503,-0.00497,-0.003342,-0.006644,0.007309,0.00098,-0.011854,-0.001052,-0.00141,-0.014626,0.00404,-0.020466,0.006465,-0.005314,-0.010918,-0.013108,-0.003183,0.005496,-0.013364,-0.00479,-0.005681,-0.008065,0.005603,0.009325,-8.6e-05,0.005608,0.007851,0.005309,-0.00078,0.006547,0.014848,0.003858,0.004256,0.00266,-0.005352,-0.008244,-0.006791,-0.00252,0.002139,-0.003372,-0.000883,-0.001489,0.004962,-0.005588,0.003308,0.000245,-0.000627,0.006856,0.008582,-0.010968,-0.002727,-0.010003,-0.000709,-0.008224,-0.006082,0.022361,0.014239,0.002011,-0.001106,-0.011387,0.020319,-0.011027,0.004154,-0.003825,0.006502,-0.00662,-0.007523,0.004425,0.003293,-0.002899,-0.013977,0.001427,-0.006377,-0.003255,0.001706,0.003245,0.015681,-0.003339,-0.001453,-0.004753,0.000565,0.002815,-0.006593,-0.005069,0.002927,-0.007481,-0.009422,0.005467,0.014471,-0.007815,-0.009429,0.006147,0.007764,-0.00515,0.010706,0.007483,0.018091,-0.004173,-0.001212,0.001938,0.003661,-0.003032,-0.011679,-0.002414,0.000287,0.004712,-0.015171,-0.001563,0.008032,0.004849,0.011782,-0.00489,2.9e-05,-0.000241,-0.00363,0.011156,0.008082,0.00679,-0.001692,0.00584,0.001497,-0.003543,0.002661,0.00587,-0.002671,-0.002545,-0.002616,0.002769,-0.002981,-0.009268,-0.006416,-0.003203,-0.004013,-5.7e-05,0.001864,-0.004305,-0.005317,-0.000356,-0.006095,-0.00238,0.008474,0.005951,0.013818,0.009769,0.004383,0.015356,0.0016,-0.005227,-0.000326,0.011384,-0.002024,0.011844,-0.000937,-0.003854,-0.003002,-0.008524,-0.00748,-0.001337,-0.022247,0.006582,-0.022542,-0.001978,-0.006416,-0.006966,-0.006087,-0.000925,0.006776,-0.000509,0.00303,0.00163,-0.001535,0.006585,-0.006175,-0.010009,-0.011462,-0.002172,-0.008036,-0.011178,-0.023804,-0.000447,-0.005665,-0.00526,-0.008489,0.013584,0.00816,0.01813,0.003084,0.012662,-0.014746,-0.009338,0.003525,0.076923,0.0,1.0,4.0
75%,0.017971,0.023925,-0.001195,0.028679,0.008159,0.011726,0.012906,-0.007534,0.000479,0.001247,0.004878,0.000685,0.018849,0.00692,0.004444,0.017006,0.019683,0.011747,0.008672,-0.004263,0.008635,0.003749,0.008072,-0.01615,0.009774,0.0174,0.007095,0.011023,0.005337,0.010191,0.009433,-0.002009,0.006156,0.025693,0.004568,0.0134,0.002855,0.009049,0.002213,0.037945,0.009328,0.036996,0.005547,0.009021,0.015201,0.020589,0.004265,-0.002788,0.00221,0.012604,0.006132,0.015733,0.006225,-0.001157,0.004737,0.011118,0.014049,0.004273,0.010183,0.005437,0.006791,-0.000143,0.017223,0.010961,0.00411,0.021997,0.014458,0.002576,0.009843,0.012168,0.017199,0.004575,-0.006068,0.011987,0.024981,0.027843,0.011021,-0.001231,0.002118,-0.00666,0.010012,0.013871,0.023582,0.019637,0.013253,-0.007315,0.013679,0.016979,-0.000645,0.017617,-0.001679,-0.001244,-0.005863,-0.002444,0.025433,0.018404,0.000505,0.003091,-0.00057,-0.000652,-0.005983,0.007123,0.004003,-0.017803,0.012132,0.017666,0.000959,0.017128,0.006153,0.018226,0.004603,0.005192,0.003643,0.013782,0.009647,0.001805,0.005623,0.00419,0.007924,0.01264,-0.009398,0.012336,0.005975,0.00311,-0.005294,0.004297,0.013497,-0.006159,0.003096,0.000998,4e-06,0.01776,0.016995,0.006659,0.012098,0.014741,0.019524,0.006187,0.013886,0.021938,0.009838,0.011649,0.01182,0.002608,0.001878,0.002268,0.006141,0.009702,0.004026,0.006716,0.006882,0.010048,-0.000105,0.028836,0.007489,0.005363,0.013526,0.016109,0.002098,0.004316,-0.00319,0.009665,-0.002481,0.005416,0.033499,0.02206,0.009538,0.004951,-0.00464,0.027876,-0.003275,0.012597,0.011294,0.013557,0.001396,-0.001791,0.01256,0.011676,0.006676,-0.003844,0.007383,0.001806,0.003329,0.013285,0.010433,0.022143,0.003665,0.004525,0.006895,0.007492,0.008924,-0.000201,0.003176,0.011021,0.001099,-0.002561,0.014006,0.023716,-0.000447,-0.004671,0.013468,0.013765,0.002444,0.021028,0.018462,0.025919,0.0009,0.007245,0.009364,0.009724,0.003733,-0.002946,0.002865,0.005462,0.009949,-0.00282,0.00667,0.016209,0.011282,0.018914,0.011391,0.006144,0.005753,0.003944,0.016538,0.014927,0.01267,0.004381,0.012822,0.008509,0.00463,0.011125,0.014624,0.004537,0.003803,0.004495,0.01394,0.008542,0.000833,0.002598,0.005335,0.004618,0.005854,0.010943,0.008764,0.00694,0.009245,0.002908,0.002778,0.01581,0.013225,0.021301,0.015186,0.019542,0.023493,0.007147,0.003994,0.006683,0.017629,0.006604,0.017411,0.006192,0.003213,0.006996,0.002048,-0.000683,0.008711,-0.014177,0.0182,-0.015521,0.0056,0.000862,-0.001803,0.000208,0.007248,0.018501,0.007632,0.010658,0.009575,0.010409,0.014448,8.3e-05,-0.00389,0.001235,0.006852,-0.000797,-0.004537,-0.012575,0.00697,0.00379,0.001403,-0.001034,0.02189,0.014212,0.026409,0.012294,0.018618,-0.006419,-0.000558,0.010082,0.555556,0.5,1.833333,5.0
max,0.081535,0.1007,0.068078,0.169776,0.082428,0.064519,0.070305,0.044968,0.062329,0.114934,0.054392,0.156033,0.079815,0.071259,0.071192,0.084164,0.083976,0.079643,0.069839,0.040289,0.090283,0.066703,0.080682,0.03676,0.058032,0.083448,0.079849,0.128898,0.153188,0.062167,0.138373,0.060888,0.06154,0.126812,0.070833,0.073267,0.068609,0.051958,0.055928,0.106403,0.082233,0.107392,0.065957,0.138778,0.07598,0.094691,0.076018,0.055059,0.056716,0.074626,0.07193,0.059026,0.075542,0.057998,0.057853,0.070691,0.075323,0.055434,0.080604,0.119928,0.072082,0.063461,0.084855,0.090998,0.093355,0.106459,0.082302,0.066787,0.085631,0.063116,0.120154,0.054934,0.038418,0.098375,0.104108,0.129858,0.085674,0.054118,0.048753,0.051256,0.064655,0.100952,0.116618,0.070395,0.068484,0.033876,0.090183,0.092564,0.05741,0.100071,0.097549,0.064437,0.106818,0.055395,0.077313,0.062921,0.063113,0.051957,0.102409,0.045852,0.060838,0.063346,0.06788,0.033245,0.170846,0.116013,0.070139,0.074734,0.049472,0.080979,0.111988,0.065356,0.065273,0.074337,0.096214,0.066765,0.070463,0.06245,0.073965,0.1141,0.058498,0.055849,0.111234,0.070819,0.05495,0.062944,0.089963,0.06636,0.071427,0.051468,0.060691,0.082385,0.073454,0.067823,0.076033,0.121409,0.101478,0.067328,0.07562,0.086071,0.057776,0.059426,0.066149,0.073711,0.080891,0.065154,0.125979,0.100346,0.095122,0.054199,0.067666,0.054086,0.09814,0.113755,0.06848,0.064036,0.071647,0.088584,0.06393,0.051442,0.059486,0.083332,0.05103,0.094777,0.090311,0.088035,0.06737,0.075321,0.061797,0.097326,0.04895,0.067989,0.07643,0.09152,0.053098,0.065218,0.100392,0.086583,0.072474,0.045176,0.07072,0.076529,0.085263,0.075063,0.090625,0.138663,0.063092,0.055064,0.063273,0.069804,0.075678,0.058551,0.081246,0.066855,0.040847,0.051507,0.081053,0.062498,0.0623,0.045877,0.07271,0.116293,0.072662,0.079799,0.129186,0.0791,0.100046,0.100007,0.044277,0.07384,0.085775,0.070637,0.073525,0.073885,0.065696,0.118324,0.077269,0.067456,0.06655,0.080646,0.074419,0.055164,0.062465,0.07078,0.071505,0.06814,0.070111,0.059136,0.10713,0.097505,0.055258,0.065429,0.073778,0.07022,0.077175,0.058598,0.08023,0.100485,0.086195,0.084971,0.061179,0.084592,0.069911,0.067144,0.065526,0.125514,0.065901,0.076743,0.058297,0.08344,0.089709,0.077259,0.0568,0.099308,0.069184,0.057723,0.068885,0.06232,0.120086,0.11553,0.070521,0.078076,0.0553,0.066142,0.06577,0.04972,0.084665,0.076444,0.075465,0.062432,0.063511,0.058876,0.051876,0.066375,0.101633,0.077186,0.078061,0.058588,0.072591,0.072471,0.065776,0.046388,0.063315,0.073939,0.134511,0.064663,0.051912,0.07625,0.095185,0.067552,0.049244,0.054444,0.079775,0.051219,0.105447,0.075161,0.085323,0.045838,0.06568,0.067878,56.0,28.0,75.0,5.0


# Create Label y (Business categories)

In [22]:
def load_business_df(): 
    filename = r'../../data/business.json'
    new_list = []
    for line in open(filename):
       new_list.append(json.loads(line))
    return pd.DataFrame.from_records(new_list)

dfbusiness = load_business_df()

In [23]:
dfbusiness.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
0,2818 E Camino Acequia Drive,{'GoodForKids': 'False'},1SWheh84yJXfytovILXOAQ,"Golf, Active Life",Phoenix,,0,33.522143,-112.018481,Arizona Biltmore Golf Club,85016,5,3.0,AZ
1,30 Eglinton Avenue W,"{'RestaurantsReservations': 'True', 'GoodForMe...",QXAEGFB4oINsVuTFxEYKFQ,"Specialty Food, Restaurants, Dim Sum, Imported...",Mississauga,"{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",1,43.605499,-79.652289,Emerald Chinese Restaurant,L5R 3E7,128,2.5,ON
2,"10110 Johnston Rd, Ste 15","{'GoodForKids': 'True', 'NoiseLevel': 'u'avera...",gnKjwL_1w79qoiV3IC_xQQ,"Sushi Bars, Restaurants, Japanese",Charlotte,"{'Monday': '17:30-21:30', 'Wednesday': '17:30-...",1,35.092564,-80.859132,Musashi Japanese Restaurant,28210,170,4.0,NC
3,"15655 W Roosevelt St, Ste 237",,xvX2CttrVhyG2z1dFg_0xw,"Insurance, Financial Services",Goodyear,"{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ...",1,33.455613,-112.395596,Farmers Insurance - Paul Lorenz,85338,3,5.0,AZ
4,"4209 Stuart Andrew Blvd, Ste F","{'BusinessAcceptsBitcoin': 'False', 'ByAppoint...",HhyxOkGAM07SRYtlQ4wMFQ,"Plumbing, Shopping, Local Services, Home Servi...",Charlotte,"{'Monday': '7:0-23:0', 'Tuesday': '7:0-23:0', ...",1,35.190012,-80.887223,Queen City Plumbing,28217,4,4.0,NC


# Join x,y (feature matrix, category) using business_id

In [24]:
dfbusiness.columns

Index(['address', 'attributes', 'business_id', 'categories', 'city', 'hours',
       'is_open', 'latitude', 'longitude', 'name', 'postal_code',
       'review_count', 'stars', 'state'],
      dtype='object')

In [25]:
len(dfbusiness['stars'].unique())

9

In [26]:
# Add business details to features df
keep_cols = ['business_id', 'categories', 'review_count']
all_features_business = all_features_business.merge(dfbusiness[keep_cols], how='left', on='business_id') 

In [27]:
all_features_business.head()

Unnamed: 0,business_id,w2v0,w2v1,w2v2,w2v3,w2v4,w2v5,w2v6,w2v7,w2v8,w2v9,w2v10,w2v11,w2v12,w2v13,w2v14,w2v15,w2v16,w2v17,w2v18,w2v19,w2v20,w2v21,w2v22,w2v23,w2v24,w2v25,w2v26,w2v27,w2v28,w2v29,w2v30,w2v31,w2v32,w2v33,w2v34,w2v35,w2v36,w2v37,w2v38,w2v39,w2v40,w2v41,w2v42,w2v43,w2v44,w2v45,w2v46,w2v47,w2v48,w2v49,w2v50,w2v51,w2v52,w2v53,w2v54,w2v55,w2v56,w2v57,w2v58,w2v59,w2v60,w2v61,w2v62,w2v63,w2v64,w2v65,w2v66,w2v67,w2v68,w2v69,w2v70,w2v71,w2v72,w2v73,w2v74,w2v75,w2v76,w2v77,w2v78,w2v79,w2v80,w2v81,w2v82,w2v83,w2v84,w2v85,w2v86,w2v87,w2v88,w2v89,w2v90,w2v91,w2v92,w2v93,w2v94,w2v95,w2v96,w2v97,w2v98,w2v99,w2v100,w2v101,w2v102,w2v103,w2v104,w2v105,w2v106,w2v107,w2v108,w2v109,w2v110,w2v111,w2v112,w2v113,w2v114,w2v115,w2v116,w2v117,w2v118,w2v119,w2v120,w2v121,w2v122,w2v123,w2v124,w2v125,w2v126,w2v127,w2v128,w2v129,w2v130,w2v131,w2v132,w2v133,w2v134,w2v135,w2v136,w2v137,w2v138,w2v139,w2v140,w2v141,w2v142,w2v143,w2v144,w2v145,w2v146,w2v147,w2v148,w2v149,w2v150,w2v151,w2v152,w2v153,w2v154,w2v155,w2v156,w2v157,w2v158,w2v159,w2v160,w2v161,w2v162,w2v163,w2v164,w2v165,w2v166,w2v167,w2v168,w2v169,w2v170,w2v171,w2v172,w2v173,w2v174,w2v175,w2v176,w2v177,w2v178,w2v179,w2v180,w2v181,w2v182,w2v183,w2v184,w2v185,w2v186,w2v187,w2v188,w2v189,w2v190,w2v191,w2v192,w2v193,w2v194,w2v195,w2v196,w2v197,w2v198,w2v199,w2v200,w2v201,w2v202,w2v203,w2v204,w2v205,w2v206,w2v207,w2v208,w2v209,w2v210,w2v211,w2v212,w2v213,w2v214,w2v215,w2v216,w2v217,w2v218,w2v219,w2v220,w2v221,w2v222,w2v223,w2v224,w2v225,w2v226,w2v227,w2v228,w2v229,w2v230,w2v231,w2v232,w2v233,w2v234,w2v235,w2v236,w2v237,w2v238,w2v239,w2v240,w2v241,w2v242,w2v243,w2v244,w2v245,w2v246,w2v247,w2v248,w2v249,w2v250,w2v251,w2v252,w2v253,w2v254,w2v255,w2v256,w2v257,w2v258,w2v259,w2v260,w2v261,w2v262,w2v263,w2v264,w2v265,w2v266,w2v267,w2v268,w2v269,w2v270,w2v271,w2v272,w2v273,w2v274,w2v275,w2v276,w2v277,w2v278,w2v279,w2v280,w2v281,w2v282,w2v283,w2v284,w2v285,w2v286,w2v287,w2v288,w2v289,w2v290,w2v291,w2v292,w2v293,w2v294,w2v295,w2v296,w2v297,w2v298,w2v299,cool,funny,useful,stars,categories,review_count
0,--I7YYLada0tSLkORTHb5Q,0.016423,0.028373,-0.017573,0.015987,-0.000105,-0.000613,0.022004,-0.017549,-0.007508,-0.007643,-0.004137,0.005752,0.021792,0.00858,-0.005153,0.016274,0.011641,-0.006097,0.007114,-0.016152,-0.021916,-0.002982,0.006849,-0.020937,-0.001696,0.013037,-0.000854,-0.001098,0.000506,-0.009859,-0.008329,-0.009132,-0.000561,0.030923,-0.028243,0.010541,-0.011148,0.005102,-0.010144,0.020217,-0.001866,0.043708,-0.007104,-0.005223,-0.006323,-0.014395,-0.018785,-0.00872,0.000136,0.008391,0.001354,0.005404,0.009978,-0.00328,0.001135,0.006917,0.009708,0.002898,-0.005693,-0.006635,0.008571,-0.0069,0.021818,0.008268,0.009152,-0.004829,0.015014,0.004731,0.003607,-0.002116,0.038439,-0.007744,-0.006328,-0.002035,0.003954,-0.003805,0.001273,0.003992,-0.004673,-0.006827,-0.003431,-0.006051,0.011142,-0.001584,0.015804,-0.020293,0.01824,0.007635,-0.003957,-0.004237,-0.009845,-0.00238,-0.009501,-0.002669,0.006937,0.002659,-0.016396,0.007884,-0.004637,-0.000481,-0.004326,0.013595,-0.00826,-0.012649,0.012902,0.002506,0.003483,-0.021323,0.014243,0.018884,-0.017765,-0.013973,0.002364,0.018644,0.012008,0.01472,0.006201,0.005151,0.019246,-0.002152,-0.03451,0.00855,0.003693,0.002966,-0.00023,0.00415,-0.00636,-0.008175,-0.003261,-0.007701,-0.012668,0.003262,0.014533,-0.004862,5.7e-05,0.010327,0.01357,-0.006513,0.003236,0.011813,-0.001414,-0.010102,0.010253,-0.00662,0.000912,-0.006159,0.009072,0.014108,0.005114,0.005896,-0.008537,0.004655,-0.001175,0.034869,-0.010275,5.6e-05,0.007967,0.004398,-0.0247,0.001478,-0.005329,-0.008903,-0.005312,0.004596,0.006408,0.013327,-0.000956,-0.005602,-0.018464,0.021665,-0.019262,0.000263,-0.014808,0.000905,-0.006787,-0.008589,0.019641,0.009316,-0.00208,-0.000111,-0.002086,0.004262,-0.007424,0.011197,0.001892,0.017257,0.011649,-0.001445,0.009688,0.001303,0.00728,-0.011114,-0.015809,0.005787,-0.000729,-0.013344,0.002589,0.028851,0.000345,-0.011377,0.000261,0.0127,-0.004975,0.023475,-0.005133,0.02668,-0.002168,0.005035,-0.01226,-0.001136,-0.009256,-0.0221,-0.005014,-0.00075,0.004653,-0.002741,-0.007252,0.009529,-0.006381,0.012016,0.017946,0.003922,0.000784,-0.001312,0.014511,0.011437,0.012514,-0.003473,0.014323,-0.00278,-0.013057,0.014756,0.015251,-0.008106,-0.001643,-0.003443,0.006926,0.008021,0.006439,-0.021857,-0.001257,-0.000578,0.003006,-0.001827,-0.025314,0.016381,0.011534,-0.006476,-0.007396,0.007404,0.00877,0.022904,0.01604,0.020333,0.002619,0.00201,-0.020388,0.002111,0.011524,0.006622,0.006941,0.002426,-0.000127,-0.003453,0.000524,-0.0101,0.008531,-0.02123,-0.017735,-0.02621,-0.005717,-0.013019,-0.003495,-0.001274,0.01131,-0.009379,-0.017453,-0.00117,0.014352,-0.002489,0.019097,0.003758,-0.009179,0.001424,-0.006294,-0.009914,-0.018522,-0.009218,0.004427,-0.0106,-0.013605,0.002903,0.005445,0.01152,0.016746,-0.005915,0.024629,-0.007327,-0.022198,0.001746,0.352941,0.352941,0.823529,3.647059,"Nightlife, Sports Bars, Restaurants, Bars, Ame...",96
1,--U98MNlDym2cLn36BBPgQ,0.009526,0.020788,-0.013227,0.037498,0.006747,-0.004874,0.012273,-0.024117,0.008519,-0.009936,-0.00301,-0.007703,0.018541,0.022424,-0.00855,0.003445,0.014191,0.000926,0.001358,-0.00626,-0.01699,0.007976,-0.001236,-0.029893,-0.004399,0.022837,-0.005918,0.008592,0.012597,-0.005725,0.009391,0.000858,0.003131,0.034826,-0.026948,-0.00441,-0.012987,-0.005924,-0.012023,0.018018,0.005187,0.033649,-0.007751,-0.013777,0.004336,-0.018224,-0.030893,-0.015549,0.008527,0.005289,-0.000202,-0.001845,0.011829,-0.001018,0.005172,0.001364,-0.001056,-0.00733,-0.018776,-0.007521,0.00076,-0.013104,0.003671,-0.007366,-0.001656,0.011103,0.002937,0.001705,0.015391,-0.006962,0.000296,-0.005424,-0.024318,0.011868,0.022309,0.015153,-0.003186,-0.003033,0.002904,-0.014292,0.003285,0.004305,0.017303,0.003647,0.003455,-0.006365,0.00517,0.001712,-0.004432,-0.006427,-0.014719,-0.003795,-0.002669,-0.008352,0.002823,0.005066,-0.005835,0.005526,-0.00072,0.001431,-0.005034,0.006255,0.009896,-0.020154,0.006983,0.001042,-0.009043,-0.007399,0.003431,0.011243,-0.020616,-0.006034,-0.004156,0.004347,0.006449,-0.003178,0.005243,-0.000175,9.5e-05,-0.005957,-0.01748,0.004124,0.006667,0.000624,-0.003606,-0.012922,-0.010647,-0.01105,0.000661,-0.008315,0.005649,0.004742,0.014144,-0.001626,-0.003618,0.004101,-0.007025,0.004233,0.001314,0.007004,0.002882,-0.005765,-0.005438,-0.008123,-0.006626,-0.000191,-0.015236,0.017853,0.002019,0.007784,-0.004522,0.003228,-0.011678,0.015277,-0.000337,-0.006169,0.007778,0.012632,-0.00437,0.005508,-0.005371,0.011217,-0.006899,0.004853,0.021217,0.004433,-0.008672,-0.00017,-0.013791,0.003937,-0.008665,0.002376,-0.005243,-0.003049,-0.011634,0.0051,0.01349,-0.000978,0.000452,-0.00491,0.007104,-0.008656,-0.013201,0.009715,0.00019,0.018675,-0.004282,-0.01235,-0.005294,-0.003098,0.0043,-0.010867,-0.008232,-0.005555,-0.008416,-0.000438,0.006088,0.034031,-0.005951,-0.011274,-0.006163,0.022856,0.00466,0.016616,-0.017315,0.029941,-0.004681,-0.006773,0.003573,0.000269,-0.003365,-0.020757,-0.007967,0.001097,0.001597,-0.014545,0.008238,-0.002162,-0.0011,0.008331,0.006731,-0.012262,0.011174,-0.002553,0.003972,0.011859,-0.002478,0.000185,0.008342,-0.00687,0.009072,0.005089,0.008168,-0.008667,-0.008383,0.007071,0.007966,-0.003892,-0.011247,-0.01095,-0.000924,-0.000457,-0.000581,-0.006893,-0.014215,0.00518,0.012293,-0.005615,-0.00375,0.005397,0.005264,0.009632,0.017666,0.002859,0.005952,0.003424,-0.016514,0.003168,0.005319,0.009492,0.005101,0.00985,0.000228,-0.007808,-0.010544,-0.005459,-0.003861,-0.037872,-0.017202,-0.018502,-0.000997,-0.002113,-0.005247,0.004077,-0.006937,-0.012416,0.001015,0.002865,0.007258,0.016955,0.000729,0.008853,-0.017975,-0.006315,-0.006089,5.3e-05,-0.006383,-0.025554,-0.010488,-0.009348,-0.004467,-0.008314,0.012735,0.017171,0.013198,0.00206,0.010875,-0.012588,-0.019368,-0.005236,0.0,0.0,2.0,3.0,"Pizza, Restaurants",4
2,--j-kaNMCo1-DYzddCsA5Q,0.035586,0.022514,-0.006759,-0.034605,-0.0043,-0.001136,0.001259,-0.030018,-0.024447,-0.019531,0.013469,-0.015355,0.014754,-0.012143,0.011463,0.011731,0.034843,0.023824,0.004841,-0.003642,0.03355,0.023334,0.040423,-0.025388,-0.005392,-0.012638,0.007508,-0.019039,-0.013676,0.005799,-0.002171,-0.031701,0.005618,0.007776,-0.01297,0.015142,-0.005067,0.002006,-0.001829,0.037603,0.003339,0.036467,-0.016766,0.020584,0.0084,0.001669,-0.013346,-0.013458,-0.011666,-0.016915,0.003241,0.013855,0.011129,-0.030245,0.008177,-0.018768,0.018454,0.008811,-5.9e-05,-0.003335,-0.006106,-0.033478,0.025956,0.018926,-0.01407,0.005113,0.02893,-0.006716,-0.008166,0.026749,-0.004555,-0.006962,-0.006616,-0.028545,0.008256,-0.011582,0.034356,-0.004741,-0.026255,-0.022443,-0.003017,0.000753,0.00483,0.011377,0.022869,-0.016355,-0.01471,0.012377,-0.015632,-0.002214,-0.030018,0.002821,-0.037887,-0.009241,0.031782,0.017117,-0.011676,-0.028895,-0.016502,-0.038487,-0.010697,-0.020796,-0.027077,-0.015862,-0.010757,0.011843,0.006723,6.8e-05,0.025043,0.001866,0.00963,0.005317,0.003732,0.030519,0.019011,-0.013259,-0.005914,-0.014707,-0.022952,0.026195,-0.041787,0.016181,-0.022299,-0.006052,-0.022288,0.02067,0.010472,-0.006911,0.003276,0.00254,-0.023013,0.009655,0.016446,0.018688,0.020813,0.026653,0.014267,0.003966,0.039087,0.036969,0.008616,0.015359,0.000131,0.018841,-0.03424,-0.047229,0.012753,-0.022866,-0.030703,-0.003877,0.010786,0.00024,0.004964,-0.017763,0.020084,-0.037403,0.020078,0.018008,-0.036033,-0.034225,-0.030038,-0.000533,0.00824,0.007428,0.014448,0.034219,0.03488,-0.003483,-0.016695,0.038513,-0.028679,-2.1e-05,-0.011349,0.004167,0.016462,-0.016115,0.001255,0.0207,-0.006923,-0.016865,0.002612,0.004554,0.021733,-0.005121,-0.011707,0.020933,-0.013895,-0.021148,0.013658,-0.017727,-0.016137,-0.007611,0.001611,0.025292,-0.030883,-0.025738,0.009383,0.014083,0.004231,-0.010233,0.00343,0.017012,-0.02475,0.027743,0.036476,0.018355,-0.00885,-0.010797,-0.003072,0.024055,-0.00457,-0.01121,0.010668,0.01257,0.026892,-0.025917,-0.0182,0.039035,0.020572,0.014087,-0.007038,0.019014,-0.005364,0.002627,0.018733,0.017483,0.014418,-0.024293,0.019427,0.020707,-0.013925,0.005169,0.013683,0.00964,-0.010311,-0.018649,-0.007244,-0.011057,-0.017467,-0.004436,-0.035974,-0.007311,0.009026,0.015625,-0.012384,-0.057747,0.007954,-0.021706,-0.016572,0.025558,-0.000519,0.025739,0.000586,0.010865,0.019225,-0.005994,-0.000922,-0.007387,0.005063,-0.022474,-0.001555,0.001445,-0.02054,-0.004993,-0.004842,-0.016322,-0.001742,-0.014826,0.010301,-0.023069,0.011577,-0.008462,0.000272,0.005402,-0.011265,-0.000371,0.018296,-0.011073,0.001637,-0.024963,0.02022,-0.005851,-0.021839,-0.045366,0.00491,-0.001685,0.008545,-0.054177,-0.001469,-0.014475,0.000184,0.002357,0.01168,0.009636,0.027132,0.029206,0.035708,-0.035958,-0.010643,0.003613,0.0,0.0,0.0,5.0,"Hair Removal, Nail Technicians, Beauty & Spas,...",4
3,--wIGbLEhlpl_UeAIyDmZQ,-0.006451,0.005224,-0.009912,0.01222,-0.001507,0.014311,-0.004426,-0.013908,-0.003054,-0.003489,-0.005158,-0.013398,-0.009127,-0.013979,-0.00975,0.015487,0.007607,0.013315,-0.011175,-0.009165,0.002724,-0.008716,-0.013071,-0.0234,0.003305,0.020969,-0.002856,-0.001024,-0.011011,0.005339,-2.6e-05,-0.009941,-0.001624,-0.009941,0.00411,0.010796,0.001066,-0.000427,0.006601,0.025162,0.008402,0.005168,0.008002,-0.001039,0.01567,0.017713,0.00148,-0.007099,-0.009715,0.002707,-0.005726,0.007443,-0.017098,-0.014837,-0.004003,-0.00451,0.007199,-7.2e-05,0.003028,0.004661,-0.001873,-0.009008,-0.001957,-0.012551,-0.001671,0.026746,-0.015515,-0.033926,0.000387,0.013197,-0.04292,0.004932,-0.013605,0.010155,0.01418,0.030974,-0.010119,-0.013864,-0.004738,-0.023016,0.00213,0.012809,0.012343,0.019651,0.004754,-0.006894,-0.005952,0.012059,-0.006827,0.020048,-0.002721,-0.013028,-0.00936,-0.016978,0.018649,0.024557,-0.001547,-0.002572,0.003069,-0.006623,-0.021931,-0.007246,-0.004054,-0.031499,-0.021295,0.02005,-0.005289,0.023503,-0.011998,-0.022676,-8.7e-05,-0.007599,-0.023538,0.009769,-0.017149,-0.030261,-0.011132,-0.00241,-0.030899,0.006051,0.001235,0.000696,-0.009934,-0.021671,-0.020202,-0.007275,0.010149,-0.025803,-0.004109,0.000107,0.001046,0.025114,0.005574,-0.023765,0.006369,0.013491,-0.014562,-0.010531,-0.00315,0.009234,0.011258,0.010218,-0.006969,-0.012741,-0.00551,0.005818,-0.019811,0.00496,-0.006015,-0.007071,0.011693,0.005291,-0.014156,-0.009118,0.000954,0.001818,0.009421,0.006589,0.008889,0.002381,-0.022533,0.022815,-0.007912,-0.02021,0.035407,0.01739,-0.000898,-0.004029,-0.000687,0.011396,0.000525,0.006189,0.011964,-0.001504,-0.007669,-0.002903,-0.001048,0.001073,0.00497,-0.019298,-0.001323,-0.017552,-0.018586,-0.01448,0.012289,0.014894,-0.006163,0.010517,-0.022579,0.010857,-0.001869,-0.002703,-0.008702,-0.005797,-0.008647,-0.005226,0.016426,0.007479,-0.027694,-0.012755,0.016212,0.005117,-0.018269,0.002022,0.008764,0.019119,-0.006846,0.000805,0.006318,0.005565,0.003548,-0.005001,-0.0042,0.004131,-0.008311,-0.019334,-0.005256,-0.002412,-0.000262,0.005065,-0.016332,0.00191,-0.010828,-0.015169,0.009904,0.013332,-0.005678,0.0022,-0.001355,0.001587,0.00186,0.000814,0.003844,-0.00014,-0.002234,-0.000621,0.015916,-0.011911,-0.014774,-0.013549,0.002547,0.010617,-0.003541,-0.001351,0.018963,0.002975,-0.014302,0.01447,0.005929,0.015377,0.015909,-0.001553,0.009135,-0.01686,0.022797,0.001344,-0.001511,-0.00185,0.004181,0.006768,0.017253,0.005129,0.009166,-0.012016,-0.016002,0.004678,-0.024996,-0.026095,0.010083,-0.01217,0.007552,0.002216,-0.003668,-0.01454,-0.013339,0.009703,0.002095,0.011977,-0.003552,0.011336,-0.002475,-0.010721,-0.009994,-0.008097,-0.013916,-0.002644,-0.015977,-0.031741,-0.008459,-0.002852,0.00265,-0.014241,0.017916,0.007721,0.023303,0.010598,0.007041,-0.007397,-0.002891,-0.002526,0.666667,0.166667,3.0,3.833333,"Electronics, Professional Services, Local Serv...",14
4,-000aQFeK6tqVLndf7xORg,-0.007433,-1.1e-05,-0.009821,0.013711,-0.007697,0.009369,-0.000267,-0.014385,0.013446,-0.006545,-0.004254,-0.00884,0.006065,-0.020545,-0.014723,0.021661,0.007791,0.023143,-0.019433,-0.010503,0.016465,0.003757,-0.001521,-0.019992,0.001518,0.018253,-0.008955,-0.009514,-0.018659,0.013087,-0.008339,-0.016071,0.002549,-0.015986,0.011783,0.0182,0.001336,-0.005553,-0.003634,0.033946,0.020831,0.002149,0.002212,-0.004911,0.015449,0.023007,0.004104,-0.006299,-0.017385,-0.009145,-0.008275,0.01966,-0.010135,-0.007654,0.009172,-0.020482,-0.005627,0.003912,-0.009415,0.001919,-0.004439,-0.014971,0.014464,0.007223,-0.014237,0.027177,-0.003932,-0.02755,-0.009708,0.008284,-0.052534,-0.002317,-0.020593,0.014291,0.041985,0.045631,-0.00363,-0.023346,-0.016834,-0.036259,0.003925,0.019696,0.027176,0.02353,0.00804,-0.015238,-0.004602,0.028189,-0.00878,0.026455,-0.004413,-0.023286,-0.020831,-0.013712,0.02049,0.017521,-0.004392,-0.004432,-0.004666,-0.01965,-0.015279,-0.023229,-0.009894,-0.02654,-0.015425,0.026099,-0.009173,0.025814,-0.002449,-0.024673,0.001656,-0.002645,-0.024472,0.009885,0.002412,-0.021211,-0.011158,-0.004071,-0.039804,0.015876,-0.007912,0.00298,-0.028761,-0.027686,-0.021079,0.0001,0.002395,-0.029705,-0.010856,-0.012114,-0.010032,0.024644,0.008433,-0.023729,-0.007477,0.028917,-0.010986,-0.016497,0.009415,0.018289,0.004225,0.013398,-0.003217,-0.014722,-0.009774,-0.020824,-0.011235,0.010129,0.004694,-0.01414,0.011297,0.00864,0.003554,-0.013905,0.010194,-0.000588,0.016452,0.005398,0.00949,-0.016907,-0.019449,0.019247,-0.018269,-0.022054,0.035479,0.017517,0.014358,-0.00592,-0.005499,0.022577,-0.002098,0.028162,0.028012,-0.009083,-0.010663,0.003411,-0.007584,-0.008702,-0.003066,-0.024061,0.009006,-0.005962,-0.008443,-0.012843,0.014679,0.012292,-0.010667,0.011949,-0.019478,0.004063,-0.001255,-0.01176,-0.01194,-0.015718,-0.016672,-0.022398,0.019692,0.009881,-0.025308,-0.017314,0.027234,0.002567,-0.018215,-0.002731,0.014089,0.018067,-0.008646,-0.002019,0.008941,0.01073,0.011453,-0.008433,-0.005288,0.0013,0.010854,-0.024225,-0.011229,-0.009023,0.00282,0.020369,-0.027245,-0.005153,-0.01309,0.00477,0.013618,0.008284,0.007004,-0.000237,-0.009649,0.006179,-0.003944,-0.002354,0.006194,-0.01096,-0.011094,-0.018331,0.02691,-0.010754,-0.030586,-0.006552,-0.001103,0.014271,0.00219,0.010393,0.018642,-0.003907,-0.015776,0.002287,0.002076,0.017322,0.026583,-0.005787,-0.001431,-0.017098,0.030672,0.003212,-0.00348,-0.009102,0.006372,0.002701,0.032321,0.009328,-0.014496,-0.021993,-0.022041,-0.006105,-0.009757,-0.03687,0.018777,-0.028428,0.005373,-0.006823,-0.002413,-0.016446,-0.007992,0.013408,0.008006,0.021931,-0.015675,0.015636,0.001517,-0.024616,-0.013544,-0.026681,-0.004654,0.006673,-0.019194,-0.039296,-0.002843,0.004196,-0.007143,-0.012022,0.017546,0.011267,0.038164,0.014851,0.016622,-0.022971,0.00502,0.014055,0.666667,0.0,0.0,5.0,"Automotive, Auto Repair",7


In [28]:
all_features_business['categories'][0]

'Nightlife, Sports Bars, Restaurants, Bars, American (Traditional)'

In [29]:
all_features_business.head()

Unnamed: 0,business_id,w2v0,w2v1,w2v2,w2v3,w2v4,w2v5,w2v6,w2v7,w2v8,w2v9,w2v10,w2v11,w2v12,w2v13,w2v14,w2v15,w2v16,w2v17,w2v18,w2v19,w2v20,w2v21,w2v22,w2v23,w2v24,w2v25,w2v26,w2v27,w2v28,w2v29,w2v30,w2v31,w2v32,w2v33,w2v34,w2v35,w2v36,w2v37,w2v38,w2v39,w2v40,w2v41,w2v42,w2v43,w2v44,w2v45,w2v46,w2v47,w2v48,w2v49,w2v50,w2v51,w2v52,w2v53,w2v54,w2v55,w2v56,w2v57,w2v58,w2v59,w2v60,w2v61,w2v62,w2v63,w2v64,w2v65,w2v66,w2v67,w2v68,w2v69,w2v70,w2v71,w2v72,w2v73,w2v74,w2v75,w2v76,w2v77,w2v78,w2v79,w2v80,w2v81,w2v82,w2v83,w2v84,w2v85,w2v86,w2v87,w2v88,w2v89,w2v90,w2v91,w2v92,w2v93,w2v94,w2v95,w2v96,w2v97,w2v98,w2v99,w2v100,w2v101,w2v102,w2v103,w2v104,w2v105,w2v106,w2v107,w2v108,w2v109,w2v110,w2v111,w2v112,w2v113,w2v114,w2v115,w2v116,w2v117,w2v118,w2v119,w2v120,w2v121,w2v122,w2v123,w2v124,w2v125,w2v126,w2v127,w2v128,w2v129,w2v130,w2v131,w2v132,w2v133,w2v134,w2v135,w2v136,w2v137,w2v138,w2v139,w2v140,w2v141,w2v142,w2v143,w2v144,w2v145,w2v146,w2v147,w2v148,w2v149,w2v150,w2v151,w2v152,w2v153,w2v154,w2v155,w2v156,w2v157,w2v158,w2v159,w2v160,w2v161,w2v162,w2v163,w2v164,w2v165,w2v166,w2v167,w2v168,w2v169,w2v170,w2v171,w2v172,w2v173,w2v174,w2v175,w2v176,w2v177,w2v178,w2v179,w2v180,w2v181,w2v182,w2v183,w2v184,w2v185,w2v186,w2v187,w2v188,w2v189,w2v190,w2v191,w2v192,w2v193,w2v194,w2v195,w2v196,w2v197,w2v198,w2v199,w2v200,w2v201,w2v202,w2v203,w2v204,w2v205,w2v206,w2v207,w2v208,w2v209,w2v210,w2v211,w2v212,w2v213,w2v214,w2v215,w2v216,w2v217,w2v218,w2v219,w2v220,w2v221,w2v222,w2v223,w2v224,w2v225,w2v226,w2v227,w2v228,w2v229,w2v230,w2v231,w2v232,w2v233,w2v234,w2v235,w2v236,w2v237,w2v238,w2v239,w2v240,w2v241,w2v242,w2v243,w2v244,w2v245,w2v246,w2v247,w2v248,w2v249,w2v250,w2v251,w2v252,w2v253,w2v254,w2v255,w2v256,w2v257,w2v258,w2v259,w2v260,w2v261,w2v262,w2v263,w2v264,w2v265,w2v266,w2v267,w2v268,w2v269,w2v270,w2v271,w2v272,w2v273,w2v274,w2v275,w2v276,w2v277,w2v278,w2v279,w2v280,w2v281,w2v282,w2v283,w2v284,w2v285,w2v286,w2v287,w2v288,w2v289,w2v290,w2v291,w2v292,w2v293,w2v294,w2v295,w2v296,w2v297,w2v298,w2v299,cool,funny,useful,stars,categories,review_count
0,--I7YYLada0tSLkORTHb5Q,0.016423,0.028373,-0.017573,0.015987,-0.000105,-0.000613,0.022004,-0.017549,-0.007508,-0.007643,-0.004137,0.005752,0.021792,0.00858,-0.005153,0.016274,0.011641,-0.006097,0.007114,-0.016152,-0.021916,-0.002982,0.006849,-0.020937,-0.001696,0.013037,-0.000854,-0.001098,0.000506,-0.009859,-0.008329,-0.009132,-0.000561,0.030923,-0.028243,0.010541,-0.011148,0.005102,-0.010144,0.020217,-0.001866,0.043708,-0.007104,-0.005223,-0.006323,-0.014395,-0.018785,-0.00872,0.000136,0.008391,0.001354,0.005404,0.009978,-0.00328,0.001135,0.006917,0.009708,0.002898,-0.005693,-0.006635,0.008571,-0.0069,0.021818,0.008268,0.009152,-0.004829,0.015014,0.004731,0.003607,-0.002116,0.038439,-0.007744,-0.006328,-0.002035,0.003954,-0.003805,0.001273,0.003992,-0.004673,-0.006827,-0.003431,-0.006051,0.011142,-0.001584,0.015804,-0.020293,0.01824,0.007635,-0.003957,-0.004237,-0.009845,-0.00238,-0.009501,-0.002669,0.006937,0.002659,-0.016396,0.007884,-0.004637,-0.000481,-0.004326,0.013595,-0.00826,-0.012649,0.012902,0.002506,0.003483,-0.021323,0.014243,0.018884,-0.017765,-0.013973,0.002364,0.018644,0.012008,0.01472,0.006201,0.005151,0.019246,-0.002152,-0.03451,0.00855,0.003693,0.002966,-0.00023,0.00415,-0.00636,-0.008175,-0.003261,-0.007701,-0.012668,0.003262,0.014533,-0.004862,5.7e-05,0.010327,0.01357,-0.006513,0.003236,0.011813,-0.001414,-0.010102,0.010253,-0.00662,0.000912,-0.006159,0.009072,0.014108,0.005114,0.005896,-0.008537,0.004655,-0.001175,0.034869,-0.010275,5.6e-05,0.007967,0.004398,-0.0247,0.001478,-0.005329,-0.008903,-0.005312,0.004596,0.006408,0.013327,-0.000956,-0.005602,-0.018464,0.021665,-0.019262,0.000263,-0.014808,0.000905,-0.006787,-0.008589,0.019641,0.009316,-0.00208,-0.000111,-0.002086,0.004262,-0.007424,0.011197,0.001892,0.017257,0.011649,-0.001445,0.009688,0.001303,0.00728,-0.011114,-0.015809,0.005787,-0.000729,-0.013344,0.002589,0.028851,0.000345,-0.011377,0.000261,0.0127,-0.004975,0.023475,-0.005133,0.02668,-0.002168,0.005035,-0.01226,-0.001136,-0.009256,-0.0221,-0.005014,-0.00075,0.004653,-0.002741,-0.007252,0.009529,-0.006381,0.012016,0.017946,0.003922,0.000784,-0.001312,0.014511,0.011437,0.012514,-0.003473,0.014323,-0.00278,-0.013057,0.014756,0.015251,-0.008106,-0.001643,-0.003443,0.006926,0.008021,0.006439,-0.021857,-0.001257,-0.000578,0.003006,-0.001827,-0.025314,0.016381,0.011534,-0.006476,-0.007396,0.007404,0.00877,0.022904,0.01604,0.020333,0.002619,0.00201,-0.020388,0.002111,0.011524,0.006622,0.006941,0.002426,-0.000127,-0.003453,0.000524,-0.0101,0.008531,-0.02123,-0.017735,-0.02621,-0.005717,-0.013019,-0.003495,-0.001274,0.01131,-0.009379,-0.017453,-0.00117,0.014352,-0.002489,0.019097,0.003758,-0.009179,0.001424,-0.006294,-0.009914,-0.018522,-0.009218,0.004427,-0.0106,-0.013605,0.002903,0.005445,0.01152,0.016746,-0.005915,0.024629,-0.007327,-0.022198,0.001746,0.352941,0.352941,0.823529,3.647059,"Nightlife, Sports Bars, Restaurants, Bars, Ame...",96
1,--U98MNlDym2cLn36BBPgQ,0.009526,0.020788,-0.013227,0.037498,0.006747,-0.004874,0.012273,-0.024117,0.008519,-0.009936,-0.00301,-0.007703,0.018541,0.022424,-0.00855,0.003445,0.014191,0.000926,0.001358,-0.00626,-0.01699,0.007976,-0.001236,-0.029893,-0.004399,0.022837,-0.005918,0.008592,0.012597,-0.005725,0.009391,0.000858,0.003131,0.034826,-0.026948,-0.00441,-0.012987,-0.005924,-0.012023,0.018018,0.005187,0.033649,-0.007751,-0.013777,0.004336,-0.018224,-0.030893,-0.015549,0.008527,0.005289,-0.000202,-0.001845,0.011829,-0.001018,0.005172,0.001364,-0.001056,-0.00733,-0.018776,-0.007521,0.00076,-0.013104,0.003671,-0.007366,-0.001656,0.011103,0.002937,0.001705,0.015391,-0.006962,0.000296,-0.005424,-0.024318,0.011868,0.022309,0.015153,-0.003186,-0.003033,0.002904,-0.014292,0.003285,0.004305,0.017303,0.003647,0.003455,-0.006365,0.00517,0.001712,-0.004432,-0.006427,-0.014719,-0.003795,-0.002669,-0.008352,0.002823,0.005066,-0.005835,0.005526,-0.00072,0.001431,-0.005034,0.006255,0.009896,-0.020154,0.006983,0.001042,-0.009043,-0.007399,0.003431,0.011243,-0.020616,-0.006034,-0.004156,0.004347,0.006449,-0.003178,0.005243,-0.000175,9.5e-05,-0.005957,-0.01748,0.004124,0.006667,0.000624,-0.003606,-0.012922,-0.010647,-0.01105,0.000661,-0.008315,0.005649,0.004742,0.014144,-0.001626,-0.003618,0.004101,-0.007025,0.004233,0.001314,0.007004,0.002882,-0.005765,-0.005438,-0.008123,-0.006626,-0.000191,-0.015236,0.017853,0.002019,0.007784,-0.004522,0.003228,-0.011678,0.015277,-0.000337,-0.006169,0.007778,0.012632,-0.00437,0.005508,-0.005371,0.011217,-0.006899,0.004853,0.021217,0.004433,-0.008672,-0.00017,-0.013791,0.003937,-0.008665,0.002376,-0.005243,-0.003049,-0.011634,0.0051,0.01349,-0.000978,0.000452,-0.00491,0.007104,-0.008656,-0.013201,0.009715,0.00019,0.018675,-0.004282,-0.01235,-0.005294,-0.003098,0.0043,-0.010867,-0.008232,-0.005555,-0.008416,-0.000438,0.006088,0.034031,-0.005951,-0.011274,-0.006163,0.022856,0.00466,0.016616,-0.017315,0.029941,-0.004681,-0.006773,0.003573,0.000269,-0.003365,-0.020757,-0.007967,0.001097,0.001597,-0.014545,0.008238,-0.002162,-0.0011,0.008331,0.006731,-0.012262,0.011174,-0.002553,0.003972,0.011859,-0.002478,0.000185,0.008342,-0.00687,0.009072,0.005089,0.008168,-0.008667,-0.008383,0.007071,0.007966,-0.003892,-0.011247,-0.01095,-0.000924,-0.000457,-0.000581,-0.006893,-0.014215,0.00518,0.012293,-0.005615,-0.00375,0.005397,0.005264,0.009632,0.017666,0.002859,0.005952,0.003424,-0.016514,0.003168,0.005319,0.009492,0.005101,0.00985,0.000228,-0.007808,-0.010544,-0.005459,-0.003861,-0.037872,-0.017202,-0.018502,-0.000997,-0.002113,-0.005247,0.004077,-0.006937,-0.012416,0.001015,0.002865,0.007258,0.016955,0.000729,0.008853,-0.017975,-0.006315,-0.006089,5.3e-05,-0.006383,-0.025554,-0.010488,-0.009348,-0.004467,-0.008314,0.012735,0.017171,0.013198,0.00206,0.010875,-0.012588,-0.019368,-0.005236,0.0,0.0,2.0,3.0,"Pizza, Restaurants",4
2,--j-kaNMCo1-DYzddCsA5Q,0.035586,0.022514,-0.006759,-0.034605,-0.0043,-0.001136,0.001259,-0.030018,-0.024447,-0.019531,0.013469,-0.015355,0.014754,-0.012143,0.011463,0.011731,0.034843,0.023824,0.004841,-0.003642,0.03355,0.023334,0.040423,-0.025388,-0.005392,-0.012638,0.007508,-0.019039,-0.013676,0.005799,-0.002171,-0.031701,0.005618,0.007776,-0.01297,0.015142,-0.005067,0.002006,-0.001829,0.037603,0.003339,0.036467,-0.016766,0.020584,0.0084,0.001669,-0.013346,-0.013458,-0.011666,-0.016915,0.003241,0.013855,0.011129,-0.030245,0.008177,-0.018768,0.018454,0.008811,-5.9e-05,-0.003335,-0.006106,-0.033478,0.025956,0.018926,-0.01407,0.005113,0.02893,-0.006716,-0.008166,0.026749,-0.004555,-0.006962,-0.006616,-0.028545,0.008256,-0.011582,0.034356,-0.004741,-0.026255,-0.022443,-0.003017,0.000753,0.00483,0.011377,0.022869,-0.016355,-0.01471,0.012377,-0.015632,-0.002214,-0.030018,0.002821,-0.037887,-0.009241,0.031782,0.017117,-0.011676,-0.028895,-0.016502,-0.038487,-0.010697,-0.020796,-0.027077,-0.015862,-0.010757,0.011843,0.006723,6.8e-05,0.025043,0.001866,0.00963,0.005317,0.003732,0.030519,0.019011,-0.013259,-0.005914,-0.014707,-0.022952,0.026195,-0.041787,0.016181,-0.022299,-0.006052,-0.022288,0.02067,0.010472,-0.006911,0.003276,0.00254,-0.023013,0.009655,0.016446,0.018688,0.020813,0.026653,0.014267,0.003966,0.039087,0.036969,0.008616,0.015359,0.000131,0.018841,-0.03424,-0.047229,0.012753,-0.022866,-0.030703,-0.003877,0.010786,0.00024,0.004964,-0.017763,0.020084,-0.037403,0.020078,0.018008,-0.036033,-0.034225,-0.030038,-0.000533,0.00824,0.007428,0.014448,0.034219,0.03488,-0.003483,-0.016695,0.038513,-0.028679,-2.1e-05,-0.011349,0.004167,0.016462,-0.016115,0.001255,0.0207,-0.006923,-0.016865,0.002612,0.004554,0.021733,-0.005121,-0.011707,0.020933,-0.013895,-0.021148,0.013658,-0.017727,-0.016137,-0.007611,0.001611,0.025292,-0.030883,-0.025738,0.009383,0.014083,0.004231,-0.010233,0.00343,0.017012,-0.02475,0.027743,0.036476,0.018355,-0.00885,-0.010797,-0.003072,0.024055,-0.00457,-0.01121,0.010668,0.01257,0.026892,-0.025917,-0.0182,0.039035,0.020572,0.014087,-0.007038,0.019014,-0.005364,0.002627,0.018733,0.017483,0.014418,-0.024293,0.019427,0.020707,-0.013925,0.005169,0.013683,0.00964,-0.010311,-0.018649,-0.007244,-0.011057,-0.017467,-0.004436,-0.035974,-0.007311,0.009026,0.015625,-0.012384,-0.057747,0.007954,-0.021706,-0.016572,0.025558,-0.000519,0.025739,0.000586,0.010865,0.019225,-0.005994,-0.000922,-0.007387,0.005063,-0.022474,-0.001555,0.001445,-0.02054,-0.004993,-0.004842,-0.016322,-0.001742,-0.014826,0.010301,-0.023069,0.011577,-0.008462,0.000272,0.005402,-0.011265,-0.000371,0.018296,-0.011073,0.001637,-0.024963,0.02022,-0.005851,-0.021839,-0.045366,0.00491,-0.001685,0.008545,-0.054177,-0.001469,-0.014475,0.000184,0.002357,0.01168,0.009636,0.027132,0.029206,0.035708,-0.035958,-0.010643,0.003613,0.0,0.0,0.0,5.0,"Hair Removal, Nail Technicians, Beauty & Spas,...",4
3,--wIGbLEhlpl_UeAIyDmZQ,-0.006451,0.005224,-0.009912,0.01222,-0.001507,0.014311,-0.004426,-0.013908,-0.003054,-0.003489,-0.005158,-0.013398,-0.009127,-0.013979,-0.00975,0.015487,0.007607,0.013315,-0.011175,-0.009165,0.002724,-0.008716,-0.013071,-0.0234,0.003305,0.020969,-0.002856,-0.001024,-0.011011,0.005339,-2.6e-05,-0.009941,-0.001624,-0.009941,0.00411,0.010796,0.001066,-0.000427,0.006601,0.025162,0.008402,0.005168,0.008002,-0.001039,0.01567,0.017713,0.00148,-0.007099,-0.009715,0.002707,-0.005726,0.007443,-0.017098,-0.014837,-0.004003,-0.00451,0.007199,-7.2e-05,0.003028,0.004661,-0.001873,-0.009008,-0.001957,-0.012551,-0.001671,0.026746,-0.015515,-0.033926,0.000387,0.013197,-0.04292,0.004932,-0.013605,0.010155,0.01418,0.030974,-0.010119,-0.013864,-0.004738,-0.023016,0.00213,0.012809,0.012343,0.019651,0.004754,-0.006894,-0.005952,0.012059,-0.006827,0.020048,-0.002721,-0.013028,-0.00936,-0.016978,0.018649,0.024557,-0.001547,-0.002572,0.003069,-0.006623,-0.021931,-0.007246,-0.004054,-0.031499,-0.021295,0.02005,-0.005289,0.023503,-0.011998,-0.022676,-8.7e-05,-0.007599,-0.023538,0.009769,-0.017149,-0.030261,-0.011132,-0.00241,-0.030899,0.006051,0.001235,0.000696,-0.009934,-0.021671,-0.020202,-0.007275,0.010149,-0.025803,-0.004109,0.000107,0.001046,0.025114,0.005574,-0.023765,0.006369,0.013491,-0.014562,-0.010531,-0.00315,0.009234,0.011258,0.010218,-0.006969,-0.012741,-0.00551,0.005818,-0.019811,0.00496,-0.006015,-0.007071,0.011693,0.005291,-0.014156,-0.009118,0.000954,0.001818,0.009421,0.006589,0.008889,0.002381,-0.022533,0.022815,-0.007912,-0.02021,0.035407,0.01739,-0.000898,-0.004029,-0.000687,0.011396,0.000525,0.006189,0.011964,-0.001504,-0.007669,-0.002903,-0.001048,0.001073,0.00497,-0.019298,-0.001323,-0.017552,-0.018586,-0.01448,0.012289,0.014894,-0.006163,0.010517,-0.022579,0.010857,-0.001869,-0.002703,-0.008702,-0.005797,-0.008647,-0.005226,0.016426,0.007479,-0.027694,-0.012755,0.016212,0.005117,-0.018269,0.002022,0.008764,0.019119,-0.006846,0.000805,0.006318,0.005565,0.003548,-0.005001,-0.0042,0.004131,-0.008311,-0.019334,-0.005256,-0.002412,-0.000262,0.005065,-0.016332,0.00191,-0.010828,-0.015169,0.009904,0.013332,-0.005678,0.0022,-0.001355,0.001587,0.00186,0.000814,0.003844,-0.00014,-0.002234,-0.000621,0.015916,-0.011911,-0.014774,-0.013549,0.002547,0.010617,-0.003541,-0.001351,0.018963,0.002975,-0.014302,0.01447,0.005929,0.015377,0.015909,-0.001553,0.009135,-0.01686,0.022797,0.001344,-0.001511,-0.00185,0.004181,0.006768,0.017253,0.005129,0.009166,-0.012016,-0.016002,0.004678,-0.024996,-0.026095,0.010083,-0.01217,0.007552,0.002216,-0.003668,-0.01454,-0.013339,0.009703,0.002095,0.011977,-0.003552,0.011336,-0.002475,-0.010721,-0.009994,-0.008097,-0.013916,-0.002644,-0.015977,-0.031741,-0.008459,-0.002852,0.00265,-0.014241,0.017916,0.007721,0.023303,0.010598,0.007041,-0.007397,-0.002891,-0.002526,0.666667,0.166667,3.0,3.833333,"Electronics, Professional Services, Local Serv...",14
4,-000aQFeK6tqVLndf7xORg,-0.007433,-1.1e-05,-0.009821,0.013711,-0.007697,0.009369,-0.000267,-0.014385,0.013446,-0.006545,-0.004254,-0.00884,0.006065,-0.020545,-0.014723,0.021661,0.007791,0.023143,-0.019433,-0.010503,0.016465,0.003757,-0.001521,-0.019992,0.001518,0.018253,-0.008955,-0.009514,-0.018659,0.013087,-0.008339,-0.016071,0.002549,-0.015986,0.011783,0.0182,0.001336,-0.005553,-0.003634,0.033946,0.020831,0.002149,0.002212,-0.004911,0.015449,0.023007,0.004104,-0.006299,-0.017385,-0.009145,-0.008275,0.01966,-0.010135,-0.007654,0.009172,-0.020482,-0.005627,0.003912,-0.009415,0.001919,-0.004439,-0.014971,0.014464,0.007223,-0.014237,0.027177,-0.003932,-0.02755,-0.009708,0.008284,-0.052534,-0.002317,-0.020593,0.014291,0.041985,0.045631,-0.00363,-0.023346,-0.016834,-0.036259,0.003925,0.019696,0.027176,0.02353,0.00804,-0.015238,-0.004602,0.028189,-0.00878,0.026455,-0.004413,-0.023286,-0.020831,-0.013712,0.02049,0.017521,-0.004392,-0.004432,-0.004666,-0.01965,-0.015279,-0.023229,-0.009894,-0.02654,-0.015425,0.026099,-0.009173,0.025814,-0.002449,-0.024673,0.001656,-0.002645,-0.024472,0.009885,0.002412,-0.021211,-0.011158,-0.004071,-0.039804,0.015876,-0.007912,0.00298,-0.028761,-0.027686,-0.021079,0.0001,0.002395,-0.029705,-0.010856,-0.012114,-0.010032,0.024644,0.008433,-0.023729,-0.007477,0.028917,-0.010986,-0.016497,0.009415,0.018289,0.004225,0.013398,-0.003217,-0.014722,-0.009774,-0.020824,-0.011235,0.010129,0.004694,-0.01414,0.011297,0.00864,0.003554,-0.013905,0.010194,-0.000588,0.016452,0.005398,0.00949,-0.016907,-0.019449,0.019247,-0.018269,-0.022054,0.035479,0.017517,0.014358,-0.00592,-0.005499,0.022577,-0.002098,0.028162,0.028012,-0.009083,-0.010663,0.003411,-0.007584,-0.008702,-0.003066,-0.024061,0.009006,-0.005962,-0.008443,-0.012843,0.014679,0.012292,-0.010667,0.011949,-0.019478,0.004063,-0.001255,-0.01176,-0.01194,-0.015718,-0.016672,-0.022398,0.019692,0.009881,-0.025308,-0.017314,0.027234,0.002567,-0.018215,-0.002731,0.014089,0.018067,-0.008646,-0.002019,0.008941,0.01073,0.011453,-0.008433,-0.005288,0.0013,0.010854,-0.024225,-0.011229,-0.009023,0.00282,0.020369,-0.027245,-0.005153,-0.01309,0.00477,0.013618,0.008284,0.007004,-0.000237,-0.009649,0.006179,-0.003944,-0.002354,0.006194,-0.01096,-0.011094,-0.018331,0.02691,-0.010754,-0.030586,-0.006552,-0.001103,0.014271,0.00219,0.010393,0.018642,-0.003907,-0.015776,0.002287,0.002076,0.017322,0.026583,-0.005787,-0.001431,-0.017098,0.030672,0.003212,-0.00348,-0.009102,0.006372,0.002701,0.032321,0.009328,-0.014496,-0.021993,-0.022041,-0.006105,-0.009757,-0.03687,0.018777,-0.028428,0.005373,-0.006823,-0.002413,-0.016446,-0.007992,0.013408,0.008006,0.021931,-0.015675,0.015636,0.001517,-0.024616,-0.013544,-0.026681,-0.004654,0.006673,-0.019194,-0.039296,-0.002843,0.004196,-0.007143,-0.012022,0.017546,0.011267,0.038164,0.014851,0.016622,-0.022971,0.00502,0.014055,0.666667,0.0,0.0,5.0,"Automotive, Auto Repair",7


In [30]:
def stringDFColToBinaryCols(df, series_name):
    # Create list of all categories
    all_cats = []
    for string in df[series_name]:
        string = str(string)
        cats = string.strip().replace(' ', '').split(',')
        for cat in cats:
            if cat not in all_cats:
                all_cats.append(cat)
    # Make binary for each cat for each row
    for cat in all_cats:
        df[cat] = df[series_name].str.strip().str.replace(' ', '').str.contains(cat)
        # This technique will have some problems. 'Golf' may appear in non-Golf categories (ie 'Disc Golf')
        # Can be fixed with regular expressions: ',Golf,' OR 'BOF Golf,' OR ',Golf EOF'
    
    return df, all_cats
        
all_features_business, all_cats = stringDFColToBinaryCols(all_features_business, 'categories')

  if sys.path[0] == '':


In [31]:
print(all_cats)

['Nightlife', 'SportsBars', 'Restaurants', 'Bars', 'American(Traditional)', 'Pizza', 'HairRemoval', 'NailTechnicians', 'Beauty&Spas', 'NailSalons', 'Waxing', 'DaySpas', 'Electronics', 'ProfessionalServices', 'LocalServices', 'ElectronicsRepair', 'Computers', 'Shopping', 'Automotive', 'AutoRepair', 'Chinese', 'EyelashService', 'TobaccoShops', 'VapeShops', 'CarDealers', 'UsedCarDealers', 'Dentists', 'GeneralDentistry', 'CosmeticDentists', 'PediatricDentists', 'Health&Medical', 'Tex-Mex', 'Mexican', 'Arts&Entertainment', 'Festivals', 'Food', 'FoodTrucks', 'FarmersMarket', 'Portuguese', 'Bakeries', 'ChickenShop', 'Barbeque', 'EventPlanning&Services', 'EventPhotography', 'Photographers', 'SessionPhotography', 'SkinCare', 'Antiques', 'IceCream&FrozenYogurt', 'Donuts', 'SpecialtyFood', 'WebDesign', 'GraphicDesign', 'Marketing', 'RecyclingCenter', 'Caterers', 'Southern', 'ComfortFood', 'Breakfast&Brunch', 'French', 'American(New)', 'Burgers', 'Sandwiches', 'Coffee&Tea', 'Brasseries', 'Gyms', '

In [32]:
print(
    len(all_features_business[all_features_business['Golf']==True]), 
    len(all_features_business[all_features_business['DiscGolf']==True]), 
)

61 1


In [48]:
print(all_features_business[all_features_business['DiscGolf']==True]['categories'].values)
print('Should not have a True value for Golf, but does. Problem to deal with in the future.')
print(all_features_business[all_features_business['DiscGolf']==True]['Golf'].values)

['Sporting Goods, Active Life, Bike Rentals, Disc Golf, Shopping']
Should not have a True value for Golf, but does. Problem to deal with in the future.
[True]


In [35]:
all_features_business.head()

Unnamed: 0,business_id,w2v0,w2v1,w2v2,w2v3,w2v4,w2v5,w2v6,w2v7,w2v8,w2v9,w2v10,w2v11,w2v12,w2v13,w2v14,w2v15,w2v16,w2v17,w2v18,w2v19,w2v20,w2v21,w2v22,w2v23,w2v24,w2v25,w2v26,w2v27,w2v28,w2v29,w2v30,w2v31,w2v32,w2v33,w2v34,w2v35,w2v36,w2v37,w2v38,w2v39,w2v40,w2v41,w2v42,w2v43,w2v44,w2v45,w2v46,w2v47,w2v48,w2v49,w2v50,w2v51,w2v52,w2v53,w2v54,w2v55,w2v56,w2v57,w2v58,w2v59,w2v60,w2v61,w2v62,w2v63,w2v64,w2v65,w2v66,w2v67,w2v68,w2v69,w2v70,w2v71,w2v72,w2v73,w2v74,w2v75,w2v76,w2v77,w2v78,w2v79,w2v80,w2v81,w2v82,w2v83,w2v84,w2v85,w2v86,w2v87,w2v88,w2v89,w2v90,w2v91,w2v92,w2v93,w2v94,w2v95,w2v96,w2v97,w2v98,w2v99,w2v100,w2v101,w2v102,w2v103,w2v104,w2v105,w2v106,w2v107,w2v108,w2v109,w2v110,w2v111,w2v112,w2v113,w2v114,w2v115,w2v116,w2v117,w2v118,w2v119,w2v120,w2v121,w2v122,w2v123,w2v124,w2v125,w2v126,w2v127,w2v128,w2v129,w2v130,w2v131,w2v132,w2v133,w2v134,w2v135,w2v136,w2v137,w2v138,w2v139,w2v140,w2v141,w2v142,w2v143,w2v144,w2v145,w2v146,w2v147,w2v148,w2v149,w2v150,w2v151,w2v152,w2v153,w2v154,w2v155,w2v156,w2v157,w2v158,w2v159,w2v160,w2v161,w2v162,w2v163,w2v164,w2v165,w2v166,w2v167,w2v168,w2v169,w2v170,w2v171,w2v172,w2v173,w2v174,w2v175,w2v176,w2v177,w2v178,w2v179,w2v180,w2v181,w2v182,w2v183,w2v184,w2v185,w2v186,w2v187,w2v188,w2v189,w2v190,w2v191,w2v192,w2v193,w2v194,w2v195,w2v196,w2v197,w2v198,w2v199,w2v200,w2v201,w2v202,w2v203,w2v204,w2v205,w2v206,w2v207,w2v208,w2v209,w2v210,w2v211,w2v212,w2v213,w2v214,w2v215,w2v216,w2v217,w2v218,w2v219,w2v220,w2v221,w2v222,w2v223,w2v224,w2v225,w2v226,w2v227,w2v228,w2v229,w2v230,w2v231,w2v232,w2v233,w2v234,w2v235,w2v236,w2v237,w2v238,w2v239,w2v240,w2v241,w2v242,w2v243,w2v244,w2v245,w2v246,w2v247,w2v248,w2v249,w2v250,w2v251,w2v252,w2v253,w2v254,w2v255,w2v256,w2v257,w2v258,w2v259,w2v260,w2v261,w2v262,w2v263,w2v264,w2v265,w2v266,w2v267,w2v268,w2v269,w2v270,w2v271,w2v272,w2v273,w2v274,w2v275,w2v276,w2v277,w2v278,w2v279,w2v280,w2v281,w2v282,w2v283,w2v284,w2v285,w2v286,w2v287,w2v288,w2v289,w2v290,w2v291,w2v292,w2v293,w2v294,w2v295,w2v296,w2v297,w2v298,w2v299,cool,funny,useful,stars,categories,review_count,Nightlife,SportsBars,Restaurants,Bars,American(Traditional),Pizza,HairRemoval,NailTechnicians,Beauty&Spas,NailSalons,Waxing,DaySpas,Electronics,ProfessionalServices,LocalServices,ElectronicsRepair,Computers,Shopping,Automotive,AutoRepair,Chinese,EyelashService,TobaccoShops,VapeShops,CarDealers,UsedCarDealers,Dentists,GeneralDentistry,CosmeticDentists,PediatricDentists,Health&Medical,Tex-Mex,Mexican,Arts&Entertainment,Festivals,Food,FoodTrucks,FarmersMarket,Portuguese,Bakeries,ChickenShop,Barbeque,EventPlanning&Services,EventPhotography,Photographers,SessionPhotography,SkinCare,Antiques,IceCream&FrozenYogurt,Donuts,SpecialtyFood,WebDesign,GraphicDesign,Marketing,RecyclingCenter,Caterers,Southern,ComfortFood,Breakfast&Brunch,French,American(New),Burgers,Sandwiches,Coffee&Tea,Brasseries,Gyms,ChildCare&DayCare,LeisureCenters,Fitness&Instruction,ActiveLife,HardwareStores,Home&Garden,RealEstate,Condominiums,Hotels,HomeServices,ShoppingCenters,Hotels&Travel,HairSalons,EthnicFood,Turkish,InternationalGrocery,TapasBars,ShippingCenters,PrintingServices,Massage,MassageTherapy,Reflexology,Buffets,Korean,SushiBars,Japanese,Cafes,Soup,Golf,Venues&EventSpaces,AutoDetailing,BodyShops,AutoCustomization,Towing,Trainers,WeightLossCenters,FoodDeliveryServices,FastFood,Delis,Ethiopian,Vegetarian,Painters,DrywallInstallation&Repair,StuccoServices,Orthodontists,Periodontists,OralSurgeons,Piercing,Tattoo,Chiropractors,Optometrists,Italian,Couriers&DeliveryServices,PublicServices&Government,SportingGoods,Fashion,GolfEquipment,Bikes,Ski&SnowboardShops,SportsWear,BikeRepair/Maintenance,Filipino,PetGroomers,Veterinarians,PetSitting,Pets,PetServices,AutoGlassServices,RealEstateServices,RealEstateAgents,Pakistani,Indian,CardioClasses,DanceStudios,ChickenWings,Cosmetics&BeautySupply,Desserts,Sewing&Alterations,Arts&Crafts,Wheel&RimRepair,Tires,AutoParts&Supplies,Colonics,Saunas,Doctors,MedicalSpas,Naturopathic/Holistic,MeditationCenters,Reiki,SpiritualShop,Orthopedists,SportsMedicine,Surgeons,Grocery,MedicalCenters,InteriorDesign,Rugs,FurnitureStores,HomeDecor,Mattresses,Women'sClothing,Men'sClothing,ShoeStores,JuiceBars&Smoothies,Acupuncture,LaserHairRemoval,FamilyPractice,UrgentCare,Thai,AsianFusion,Vietnamese,Laotian,HomeCleaning,CarpetCleaning,Accessories,Barbers,Gluten-Free,SpeechTherapists,PhysicalTherapy,OccupationalTherapy,Seafood,Steakhouses,Wholesalers,DiscountStore,PartySupplies,DepartmentStores,...,Gelato,TelevisionServiceProviders,Fences&Gates,MetalFabricators,ScubaDiving,Diving,DiveShops,WatchRepair,Halotherapy,CulturalCenter,Lakes,Macarons,CustomCakes,Aquariums,BusinessConsulting,BotanicalGardens,PaintStores,Moroccan,Persian/Iranian,DataRecovery,Cajun/Creole,PartyEquipmentRentals,CarBrokers,BootCamps,Musicians,PartyCharacters,MusicProductionServices,Cuban,PuertoRican,RVDealers,RVRental,Bowling,Venezuelan,SummerCamps,PetAdoption,RefinishingServices,PublicTransportation,CommercialTruckDealers,CommercialTruckRepair,FoodStands,CommercialRealEstate,OutletStores,Campgrounds,RVParks,Resorts,TalentAgencies,GutterServices,UsedBookstore,AdultEducation,StripteaseDancers,DanceSchools,Wallpapering,GoldBuyers,PawnShops,Videographers,Arabian,DonationCenter,TravelAgents,Basque,Spanish,WaterDelivery,WaterStores,Kosher,SkateParks,Izakaya,Poutineries,BailBondsmen,PressureWashers,Herbs&Spices,PhotoBoothRentals,CannabisDispensaries,Poke,ArtClasses,Teppanyaki,Oncologist,HotPot,Szechuan,IrishPub,CyclingClasses,MountainBiking,ShoeRepair,ShoeShine,Cupcakes,SafeStores,Hunting&FishingSupplies,RehabilitationCenter,BasketballCourts,CountryClubs,Endocrinologists,Neurologist,Irish,PetCremationServices,PersonalInjuryLaw,Divorce&FamilyLaw,BankruptcyLaw,Immunodermatologists,RetirementHomes,Cantonese,PoleDancingClasses,Rodeo,VinylRecords,Props,Delicatessen,EthnicGrocery,GuestHouses,YelpEvents,RestaurantSupplies,PatioCoverings,Masonry/Concrete,DigitizingServices,Framing,TestPreparation,PrivateTutors,Skydiving,HomeHealthCare,MedicalSupplies,Psychologists,ModernEuropean,Shutters,FabricStores,SouvenirShops,Russian,CheeseShops,CarWindowTinting,FireProtectionServices,FacePainting,Tuscan,Gastroenterologist,Butcher,Blood&PlasmaDonationCenters,German,Keys&Locksmiths,DUILaw,CriminalDefenseLaw,Investing,SmogCheckStations,CarInspectors,BrewingSupplies,HongKongStyleCafe,PublicMarkets,VehicleWraps,Airports,TeethWhitening,RVRepair,CountertopInstallation,MortuaryServices,SnowRemoval,EstatePlanningLaw,Wills,Trusts,&Probates,BusinessLaw,Airlines,Estheticians,Engraving,TrophyShops,CandleStores,PopcornShops,Fishing,TrailerDealers,BeachBars,BeachVolleyball,ArtificialTurf,PanAsian,DJs,Paintball,MiniGolf,GoKarts,Wigs,GolfLessons,Opera&Ballet,Jazz&Blues,Waffles,SolarInstallation,HomeEnergyAuditors,CannabisClinics,Uzbek,Prenatal/PerinatalCare,Hypnosis/Hypnotherapy,Eatertainment,Afghan,HealthInsuranceOffices,BeverageStore,Tiling,Sicilian,Bartenders,SpineSurgeons,Carpenters,Singaporean,SkilledNursing,Live/RawFood,SepticServices,PrintMedia,SkatingRinks,InternetCafes,WineTours,Boating,DemolitionServices,ProductDesign,3DPrinting,RoadsideAssistance,Himalayan/Nepalese,Officiants,Kickboxing,Boxing,CookingClasses,CookingSchools,PersonalChefs,Indonesian,AquariumServices,Brazilian,LaboratoryTesting,HockeyEquipment,SkateShops,RealEstatePhotography,Video/FilmProduction,Sandblasting,Perfume,PrivateJetCharter,SoulFood,Bookbinding,TanningBeds,RealEstateLaw,EmergencyPetHospital,BoatCharters,Rafting/Kayaking,BoudoirPhotography,Argentine,SocialClubs,OutdoorFurnitureStores,SouthAfrican,AcaiBowls,LactationServices,PlacentaEncapsulations,Observatories,Ukrainian,Planetarium,Cabaret,Hakka,Sailing,FireplaceServices,Gunsmith,UniversityHousing,IndoorPlaycentre,Embassy,OliveOil,Karate,LocalFishStores,MotorsportVehicleRepairs,Synagogues,GuitarStores,MobileDentRepair,Paddleboarding,Distilleries,PostOffices,PetTransportation,CurrencyExchange,PastaShops,Smokehouse,Hydrotherapy,Pop-upShops,Videos&VideoGameRental,OxygenBars,ExcavationServices,MobileHomeRepair,PickYourOwnFarms,Farms,Scottish,British,Passport&VisaServices,PianoBars,PoliceDepartments,WeddingChapels,RegistrationServices,FloatSpa,DayCamps,TrainStations,Prosthodontists,MedicalCannabisReferrals,Mongolian,Orthotics,ChristmasTrees,ClubCrawl,ScreenPrinting,HazardousWasteDisposal,EnvironmentalAbatement,LawnServices,HennaArtists,KidsHairSalons,Zoos,EmploymentLaw,DebtReliefServices,VehicleShipping,Hats,BusTours,DinnerTheater,EstateLiquidation,GeneralLitigation,Coffee&TeaSupplies,Soccer,TrailerRepair,Awnings,Pretzels,ArtSpaceRentals,EditorialServices,Honduran,Nicaraguan,Marinas,CareerCounseling,TeamBuildingActivities,TownCarService,PayrollServices,AerialFitness,CremationServices,GolfCartRentals,GolfCartDealers,LivestockFeed&Supply,UltrasoundImagingCenters,GrillingEquipment,LightingStores,Donairs,Falafel,CannabisTours,PersonalAssistants,AcneTreatment,Clowns,Magicians,InstallmentLoans,Prosthetics,ParentingClasses,FoodBanks,StreetArt,Buses,DialysisClinics,Newspapers&Magazines,Cideries,AutoSecurity,TrailerRental,TabletopGames,MedicalTransportation,SoftwareDevelopment,HolidayDecoratingServices,HolidayDecorations,Cambodian,BirdShops,LanguageSchools,SeniorCenters,OsteopathicPhysicians,PetHospice,TrafficSchools,TrafficTicketingLaw,Urologists,Taekwondo,FarmEquipmentRepair,Coffeeshops,Sunglasses,AnimalPhysicalTherapy,Rheumatologists,PartyBikeRentals,Bangladeshi,Vocational&TechnicalSchool,PetWasteRemoval,Pathologists,Aestheticians,PsychicMediums,TastingClasses,WineTastingClasses,BodyContouring,PumpkinPatches,GeneratorInstallation/Repair,AddictionMedicine,VacationRentalAgents,AppraisalServices,Snorkeling,Dominican,Gemstones&Minerals,Cryotherapy,Trinidadian,ImmigrationLaw,SupperClubs,Burmese,AssistedLivingFacilities,PianoServices,HomeownerAssociation,ScavengerHunts,WalkingTours,BeerTours,BartendingSchools,Carousels,ConciergeMedicine,Matchmakers,WellDrilling,SriLankan,Trains,FurnitureRental,Badminton,PetPhotography,TitleLoans,DanceWear,IVHydration,CPRClasses,BikeSharing,NannyServices,Cafeteria,MistingSystemServices,HorseBoarding,Recording&RehearsalStudios,DisabilityLaw,SocialSecurityLaw,HabilitativeServices,CSA,RetinaSpecialists,BoatDealers,HearingAidProviders,PowderCoating,CircuitTrainingGyms,RotisserieChicken,EnvironmentalTesting,BingoHalls,ValetServices,SugarShacks,Austrian,Races&Competitions,Anesthesiologists,HouseSitters,TikiBars,CarShareServices,Squash,VisitorCenters,CheeseTastingClasses,FleaMarkets,WorkersCompensationLaw,Mosques,HolisticAnimalCare,Firewood,FoodTours,VascularMedicine,Tableware,Hydroponics,HighFidelityAudioEquipment,BarCrawl,BounceHouseRentals,BuddhistTemples,DIYAutoShop,HerbalShops,LANCenters,ConveyorBeltSushi,Egyptian,ReligiousSchools,HairLossCenters,Armenian,MotorcycleGear,ElderCarePlanning,BoatTours,BusRental,RacingExperience,HomeStaging,ReligiousItems,Ziplining,Colombian,Rolfing,Haitian,WildlifeControl,ConceptShops,DiscGolf,Drive-InTheater,TaiChi,International,TenantandEvictionLaw,Doulas,Neurotologists,Belgian,EthicalGrocery,Shanghainese,Machine&ToolRental,FirstAidClasses,HealthRetreats,Empanadas,AirportTerminals,RoofInspectors,Airsoft,VocalCoach,TelevisionStations,IceDelivery,Gerontologists,CustomsBrokers,MotorsportVehicleDealers,FlightInstruction,Cheerleading,RockClimbing,BalloonServices,ATVRentals/Tours,MassageSchools,Pool&Billiards,PettingZoos,Toxicologists,WaterParks,AirportLounges,Australian
0,--I7YYLada0tSLkORTHb5Q,0.016423,0.028373,-0.017573,0.015987,-0.000105,-0.000613,0.022004,-0.017549,-0.007508,-0.007643,-0.004137,0.005752,0.021792,0.00858,-0.005153,0.016274,0.011641,-0.006097,0.007114,-0.016152,-0.021916,-0.002982,0.006849,-0.020937,-0.001696,0.013037,-0.000854,-0.001098,0.000506,-0.009859,-0.008329,-0.009132,-0.000561,0.030923,-0.028243,0.010541,-0.011148,0.005102,-0.010144,0.020217,-0.001866,0.043708,-0.007104,-0.005223,-0.006323,-0.014395,-0.018785,-0.00872,0.000136,0.008391,0.001354,0.005404,0.009978,-0.00328,0.001135,0.006917,0.009708,0.002898,-0.005693,-0.006635,0.008571,-0.0069,0.021818,0.008268,0.009152,-0.004829,0.015014,0.004731,0.003607,-0.002116,0.038439,-0.007744,-0.006328,-0.002035,0.003954,-0.003805,0.001273,0.003992,-0.004673,-0.006827,-0.003431,-0.006051,0.011142,-0.001584,0.015804,-0.020293,0.01824,0.007635,-0.003957,-0.004237,-0.009845,-0.00238,-0.009501,-0.002669,0.006937,0.002659,-0.016396,0.007884,-0.004637,-0.000481,-0.004326,0.013595,-0.00826,-0.012649,0.012902,0.002506,0.003483,-0.021323,0.014243,0.018884,-0.017765,-0.013973,0.002364,0.018644,0.012008,0.01472,0.006201,0.005151,0.019246,-0.002152,-0.03451,0.00855,0.003693,0.002966,-0.00023,0.00415,-0.00636,-0.008175,-0.003261,-0.007701,-0.012668,0.003262,0.014533,-0.004862,5.7e-05,0.010327,0.01357,-0.006513,0.003236,0.011813,-0.001414,-0.010102,0.010253,-0.00662,0.000912,-0.006159,0.009072,0.014108,0.005114,0.005896,-0.008537,0.004655,-0.001175,0.034869,-0.010275,5.6e-05,0.007967,0.004398,-0.0247,0.001478,-0.005329,-0.008903,-0.005312,0.004596,0.006408,0.013327,-0.000956,-0.005602,-0.018464,0.021665,-0.019262,0.000263,-0.014808,0.000905,-0.006787,-0.008589,0.019641,0.009316,-0.00208,-0.000111,-0.002086,0.004262,-0.007424,0.011197,0.001892,0.017257,0.011649,-0.001445,0.009688,0.001303,0.00728,-0.011114,-0.015809,0.005787,-0.000729,-0.013344,0.002589,0.028851,0.000345,-0.011377,0.000261,0.0127,-0.004975,0.023475,-0.005133,0.02668,-0.002168,0.005035,-0.01226,-0.001136,-0.009256,-0.0221,-0.005014,-0.00075,0.004653,-0.002741,-0.007252,0.009529,-0.006381,0.012016,0.017946,0.003922,0.000784,-0.001312,0.014511,0.011437,0.012514,-0.003473,0.014323,-0.00278,-0.013057,0.014756,0.015251,-0.008106,-0.001643,-0.003443,0.006926,0.008021,0.006439,-0.021857,-0.001257,-0.000578,0.003006,-0.001827,-0.025314,0.016381,0.011534,-0.006476,-0.007396,0.007404,0.00877,0.022904,0.01604,0.020333,0.002619,0.00201,-0.020388,0.002111,0.011524,0.006622,0.006941,0.002426,-0.000127,-0.003453,0.000524,-0.0101,0.008531,-0.02123,-0.017735,-0.02621,-0.005717,-0.013019,-0.003495,-0.001274,0.01131,-0.009379,-0.017453,-0.00117,0.014352,-0.002489,0.019097,0.003758,-0.009179,0.001424,-0.006294,-0.009914,-0.018522,-0.009218,0.004427,-0.0106,-0.013605,0.002903,0.005445,0.01152,0.016746,-0.005915,0.024629,-0.007327,-0.022198,0.001746,0.352941,0.352941,0.823529,3.647059,"Nightlife, Sports Bars, Restaurants, Bars, Ame...",96,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,--U98MNlDym2cLn36BBPgQ,0.009526,0.020788,-0.013227,0.037498,0.006747,-0.004874,0.012273,-0.024117,0.008519,-0.009936,-0.00301,-0.007703,0.018541,0.022424,-0.00855,0.003445,0.014191,0.000926,0.001358,-0.00626,-0.01699,0.007976,-0.001236,-0.029893,-0.004399,0.022837,-0.005918,0.008592,0.012597,-0.005725,0.009391,0.000858,0.003131,0.034826,-0.026948,-0.00441,-0.012987,-0.005924,-0.012023,0.018018,0.005187,0.033649,-0.007751,-0.013777,0.004336,-0.018224,-0.030893,-0.015549,0.008527,0.005289,-0.000202,-0.001845,0.011829,-0.001018,0.005172,0.001364,-0.001056,-0.00733,-0.018776,-0.007521,0.00076,-0.013104,0.003671,-0.007366,-0.001656,0.011103,0.002937,0.001705,0.015391,-0.006962,0.000296,-0.005424,-0.024318,0.011868,0.022309,0.015153,-0.003186,-0.003033,0.002904,-0.014292,0.003285,0.004305,0.017303,0.003647,0.003455,-0.006365,0.00517,0.001712,-0.004432,-0.006427,-0.014719,-0.003795,-0.002669,-0.008352,0.002823,0.005066,-0.005835,0.005526,-0.00072,0.001431,-0.005034,0.006255,0.009896,-0.020154,0.006983,0.001042,-0.009043,-0.007399,0.003431,0.011243,-0.020616,-0.006034,-0.004156,0.004347,0.006449,-0.003178,0.005243,-0.000175,9.5e-05,-0.005957,-0.01748,0.004124,0.006667,0.000624,-0.003606,-0.012922,-0.010647,-0.01105,0.000661,-0.008315,0.005649,0.004742,0.014144,-0.001626,-0.003618,0.004101,-0.007025,0.004233,0.001314,0.007004,0.002882,-0.005765,-0.005438,-0.008123,-0.006626,-0.000191,-0.015236,0.017853,0.002019,0.007784,-0.004522,0.003228,-0.011678,0.015277,-0.000337,-0.006169,0.007778,0.012632,-0.00437,0.005508,-0.005371,0.011217,-0.006899,0.004853,0.021217,0.004433,-0.008672,-0.00017,-0.013791,0.003937,-0.008665,0.002376,-0.005243,-0.003049,-0.011634,0.0051,0.01349,-0.000978,0.000452,-0.00491,0.007104,-0.008656,-0.013201,0.009715,0.00019,0.018675,-0.004282,-0.01235,-0.005294,-0.003098,0.0043,-0.010867,-0.008232,-0.005555,-0.008416,-0.000438,0.006088,0.034031,-0.005951,-0.011274,-0.006163,0.022856,0.00466,0.016616,-0.017315,0.029941,-0.004681,-0.006773,0.003573,0.000269,-0.003365,-0.020757,-0.007967,0.001097,0.001597,-0.014545,0.008238,-0.002162,-0.0011,0.008331,0.006731,-0.012262,0.011174,-0.002553,0.003972,0.011859,-0.002478,0.000185,0.008342,-0.00687,0.009072,0.005089,0.008168,-0.008667,-0.008383,0.007071,0.007966,-0.003892,-0.011247,-0.01095,-0.000924,-0.000457,-0.000581,-0.006893,-0.014215,0.00518,0.012293,-0.005615,-0.00375,0.005397,0.005264,0.009632,0.017666,0.002859,0.005952,0.003424,-0.016514,0.003168,0.005319,0.009492,0.005101,0.00985,0.000228,-0.007808,-0.010544,-0.005459,-0.003861,-0.037872,-0.017202,-0.018502,-0.000997,-0.002113,-0.005247,0.004077,-0.006937,-0.012416,0.001015,0.002865,0.007258,0.016955,0.000729,0.008853,-0.017975,-0.006315,-0.006089,5.3e-05,-0.006383,-0.025554,-0.010488,-0.009348,-0.004467,-0.008314,0.012735,0.017171,0.013198,0.00206,0.010875,-0.012588,-0.019368,-0.005236,0.0,0.0,2.0,3.0,"Pizza, Restaurants",4,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,--j-kaNMCo1-DYzddCsA5Q,0.035586,0.022514,-0.006759,-0.034605,-0.0043,-0.001136,0.001259,-0.030018,-0.024447,-0.019531,0.013469,-0.015355,0.014754,-0.012143,0.011463,0.011731,0.034843,0.023824,0.004841,-0.003642,0.03355,0.023334,0.040423,-0.025388,-0.005392,-0.012638,0.007508,-0.019039,-0.013676,0.005799,-0.002171,-0.031701,0.005618,0.007776,-0.01297,0.015142,-0.005067,0.002006,-0.001829,0.037603,0.003339,0.036467,-0.016766,0.020584,0.0084,0.001669,-0.013346,-0.013458,-0.011666,-0.016915,0.003241,0.013855,0.011129,-0.030245,0.008177,-0.018768,0.018454,0.008811,-5.9e-05,-0.003335,-0.006106,-0.033478,0.025956,0.018926,-0.01407,0.005113,0.02893,-0.006716,-0.008166,0.026749,-0.004555,-0.006962,-0.006616,-0.028545,0.008256,-0.011582,0.034356,-0.004741,-0.026255,-0.022443,-0.003017,0.000753,0.00483,0.011377,0.022869,-0.016355,-0.01471,0.012377,-0.015632,-0.002214,-0.030018,0.002821,-0.037887,-0.009241,0.031782,0.017117,-0.011676,-0.028895,-0.016502,-0.038487,-0.010697,-0.020796,-0.027077,-0.015862,-0.010757,0.011843,0.006723,6.8e-05,0.025043,0.001866,0.00963,0.005317,0.003732,0.030519,0.019011,-0.013259,-0.005914,-0.014707,-0.022952,0.026195,-0.041787,0.016181,-0.022299,-0.006052,-0.022288,0.02067,0.010472,-0.006911,0.003276,0.00254,-0.023013,0.009655,0.016446,0.018688,0.020813,0.026653,0.014267,0.003966,0.039087,0.036969,0.008616,0.015359,0.000131,0.018841,-0.03424,-0.047229,0.012753,-0.022866,-0.030703,-0.003877,0.010786,0.00024,0.004964,-0.017763,0.020084,-0.037403,0.020078,0.018008,-0.036033,-0.034225,-0.030038,-0.000533,0.00824,0.007428,0.014448,0.034219,0.03488,-0.003483,-0.016695,0.038513,-0.028679,-2.1e-05,-0.011349,0.004167,0.016462,-0.016115,0.001255,0.0207,-0.006923,-0.016865,0.002612,0.004554,0.021733,-0.005121,-0.011707,0.020933,-0.013895,-0.021148,0.013658,-0.017727,-0.016137,-0.007611,0.001611,0.025292,-0.030883,-0.025738,0.009383,0.014083,0.004231,-0.010233,0.00343,0.017012,-0.02475,0.027743,0.036476,0.018355,-0.00885,-0.010797,-0.003072,0.024055,-0.00457,-0.01121,0.010668,0.01257,0.026892,-0.025917,-0.0182,0.039035,0.020572,0.014087,-0.007038,0.019014,-0.005364,0.002627,0.018733,0.017483,0.014418,-0.024293,0.019427,0.020707,-0.013925,0.005169,0.013683,0.00964,-0.010311,-0.018649,-0.007244,-0.011057,-0.017467,-0.004436,-0.035974,-0.007311,0.009026,0.015625,-0.012384,-0.057747,0.007954,-0.021706,-0.016572,0.025558,-0.000519,0.025739,0.000586,0.010865,0.019225,-0.005994,-0.000922,-0.007387,0.005063,-0.022474,-0.001555,0.001445,-0.02054,-0.004993,-0.004842,-0.016322,-0.001742,-0.014826,0.010301,-0.023069,0.011577,-0.008462,0.000272,0.005402,-0.011265,-0.000371,0.018296,-0.011073,0.001637,-0.024963,0.02022,-0.005851,-0.021839,-0.045366,0.00491,-0.001685,0.008545,-0.054177,-0.001469,-0.014475,0.000184,0.002357,0.01168,0.009636,0.027132,0.029206,0.035708,-0.035958,-0.010643,0.003613,0.0,0.0,0.0,5.0,"Hair Removal, Nail Technicians, Beauty & Spas,...",4,False,False,False,False,False,False,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,--wIGbLEhlpl_UeAIyDmZQ,-0.006451,0.005224,-0.009912,0.01222,-0.001507,0.014311,-0.004426,-0.013908,-0.003054,-0.003489,-0.005158,-0.013398,-0.009127,-0.013979,-0.00975,0.015487,0.007607,0.013315,-0.011175,-0.009165,0.002724,-0.008716,-0.013071,-0.0234,0.003305,0.020969,-0.002856,-0.001024,-0.011011,0.005339,-2.6e-05,-0.009941,-0.001624,-0.009941,0.00411,0.010796,0.001066,-0.000427,0.006601,0.025162,0.008402,0.005168,0.008002,-0.001039,0.01567,0.017713,0.00148,-0.007099,-0.009715,0.002707,-0.005726,0.007443,-0.017098,-0.014837,-0.004003,-0.00451,0.007199,-7.2e-05,0.003028,0.004661,-0.001873,-0.009008,-0.001957,-0.012551,-0.001671,0.026746,-0.015515,-0.033926,0.000387,0.013197,-0.04292,0.004932,-0.013605,0.010155,0.01418,0.030974,-0.010119,-0.013864,-0.004738,-0.023016,0.00213,0.012809,0.012343,0.019651,0.004754,-0.006894,-0.005952,0.012059,-0.006827,0.020048,-0.002721,-0.013028,-0.00936,-0.016978,0.018649,0.024557,-0.001547,-0.002572,0.003069,-0.006623,-0.021931,-0.007246,-0.004054,-0.031499,-0.021295,0.02005,-0.005289,0.023503,-0.011998,-0.022676,-8.7e-05,-0.007599,-0.023538,0.009769,-0.017149,-0.030261,-0.011132,-0.00241,-0.030899,0.006051,0.001235,0.000696,-0.009934,-0.021671,-0.020202,-0.007275,0.010149,-0.025803,-0.004109,0.000107,0.001046,0.025114,0.005574,-0.023765,0.006369,0.013491,-0.014562,-0.010531,-0.00315,0.009234,0.011258,0.010218,-0.006969,-0.012741,-0.00551,0.005818,-0.019811,0.00496,-0.006015,-0.007071,0.011693,0.005291,-0.014156,-0.009118,0.000954,0.001818,0.009421,0.006589,0.008889,0.002381,-0.022533,0.022815,-0.007912,-0.02021,0.035407,0.01739,-0.000898,-0.004029,-0.000687,0.011396,0.000525,0.006189,0.011964,-0.001504,-0.007669,-0.002903,-0.001048,0.001073,0.00497,-0.019298,-0.001323,-0.017552,-0.018586,-0.01448,0.012289,0.014894,-0.006163,0.010517,-0.022579,0.010857,-0.001869,-0.002703,-0.008702,-0.005797,-0.008647,-0.005226,0.016426,0.007479,-0.027694,-0.012755,0.016212,0.005117,-0.018269,0.002022,0.008764,0.019119,-0.006846,0.000805,0.006318,0.005565,0.003548,-0.005001,-0.0042,0.004131,-0.008311,-0.019334,-0.005256,-0.002412,-0.000262,0.005065,-0.016332,0.00191,-0.010828,-0.015169,0.009904,0.013332,-0.005678,0.0022,-0.001355,0.001587,0.00186,0.000814,0.003844,-0.00014,-0.002234,-0.000621,0.015916,-0.011911,-0.014774,-0.013549,0.002547,0.010617,-0.003541,-0.001351,0.018963,0.002975,-0.014302,0.01447,0.005929,0.015377,0.015909,-0.001553,0.009135,-0.01686,0.022797,0.001344,-0.001511,-0.00185,0.004181,0.006768,0.017253,0.005129,0.009166,-0.012016,-0.016002,0.004678,-0.024996,-0.026095,0.010083,-0.01217,0.007552,0.002216,-0.003668,-0.01454,-0.013339,0.009703,0.002095,0.011977,-0.003552,0.011336,-0.002475,-0.010721,-0.009994,-0.008097,-0.013916,-0.002644,-0.015977,-0.031741,-0.008459,-0.002852,0.00265,-0.014241,0.017916,0.007721,0.023303,0.010598,0.007041,-0.007397,-0.002891,-0.002526,0.666667,0.166667,3.0,3.833333,"Electronics, Professional Services, Local Serv...",14,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,-000aQFeK6tqVLndf7xORg,-0.007433,-1.1e-05,-0.009821,0.013711,-0.007697,0.009369,-0.000267,-0.014385,0.013446,-0.006545,-0.004254,-0.00884,0.006065,-0.020545,-0.014723,0.021661,0.007791,0.023143,-0.019433,-0.010503,0.016465,0.003757,-0.001521,-0.019992,0.001518,0.018253,-0.008955,-0.009514,-0.018659,0.013087,-0.008339,-0.016071,0.002549,-0.015986,0.011783,0.0182,0.001336,-0.005553,-0.003634,0.033946,0.020831,0.002149,0.002212,-0.004911,0.015449,0.023007,0.004104,-0.006299,-0.017385,-0.009145,-0.008275,0.01966,-0.010135,-0.007654,0.009172,-0.020482,-0.005627,0.003912,-0.009415,0.001919,-0.004439,-0.014971,0.014464,0.007223,-0.014237,0.027177,-0.003932,-0.02755,-0.009708,0.008284,-0.052534,-0.002317,-0.020593,0.014291,0.041985,0.045631,-0.00363,-0.023346,-0.016834,-0.036259,0.003925,0.019696,0.027176,0.02353,0.00804,-0.015238,-0.004602,0.028189,-0.00878,0.026455,-0.004413,-0.023286,-0.020831,-0.013712,0.02049,0.017521,-0.004392,-0.004432,-0.004666,-0.01965,-0.015279,-0.023229,-0.009894,-0.02654,-0.015425,0.026099,-0.009173,0.025814,-0.002449,-0.024673,0.001656,-0.002645,-0.024472,0.009885,0.002412,-0.021211,-0.011158,-0.004071,-0.039804,0.015876,-0.007912,0.00298,-0.028761,-0.027686,-0.021079,0.0001,0.002395,-0.029705,-0.010856,-0.012114,-0.010032,0.024644,0.008433,-0.023729,-0.007477,0.028917,-0.010986,-0.016497,0.009415,0.018289,0.004225,0.013398,-0.003217,-0.014722,-0.009774,-0.020824,-0.011235,0.010129,0.004694,-0.01414,0.011297,0.00864,0.003554,-0.013905,0.010194,-0.000588,0.016452,0.005398,0.00949,-0.016907,-0.019449,0.019247,-0.018269,-0.022054,0.035479,0.017517,0.014358,-0.00592,-0.005499,0.022577,-0.002098,0.028162,0.028012,-0.009083,-0.010663,0.003411,-0.007584,-0.008702,-0.003066,-0.024061,0.009006,-0.005962,-0.008443,-0.012843,0.014679,0.012292,-0.010667,0.011949,-0.019478,0.004063,-0.001255,-0.01176,-0.01194,-0.015718,-0.016672,-0.022398,0.019692,0.009881,-0.025308,-0.017314,0.027234,0.002567,-0.018215,-0.002731,0.014089,0.018067,-0.008646,-0.002019,0.008941,0.01073,0.011453,-0.008433,-0.005288,0.0013,0.010854,-0.024225,-0.011229,-0.009023,0.00282,0.020369,-0.027245,-0.005153,-0.01309,0.00477,0.013618,0.008284,0.007004,-0.000237,-0.009649,0.006179,-0.003944,-0.002354,0.006194,-0.01096,-0.011094,-0.018331,0.02691,-0.010754,-0.030586,-0.006552,-0.001103,0.014271,0.00219,0.010393,0.018642,-0.003907,-0.015776,0.002287,0.002076,0.017322,0.026583,-0.005787,-0.001431,-0.017098,0.030672,0.003212,-0.00348,-0.009102,0.006372,0.002701,0.032321,0.009328,-0.014496,-0.021993,-0.022041,-0.006105,-0.009757,-0.03687,0.018777,-0.028428,0.005373,-0.006823,-0.002413,-0.016446,-0.007992,0.013408,0.008006,0.021931,-0.015675,0.015636,0.001517,-0.024616,-0.013544,-0.026681,-0.004654,0.006673,-0.019194,-0.039296,-0.002843,0.004196,-0.007143,-0.012022,0.017546,0.011267,0.038164,0.014851,0.016622,-0.022971,0.00502,0.014055,0.666667,0.0,0.0,5.0,"Automotive, Auto Repair",7,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [49]:
# Clean

# Remove rows with NaNs
print('Before: ', len(all_features_business))
all_features_business = all_features_business.dropna(axis=0)
print('After:  ', len(all_features_business))

Before:  13922
After:   13922


In [50]:
# Create final y and x 

y_df = all_features_business[all_cats]
x_cols = [ele for ele in all_features_business.columns if ele not in all_cats+['categories', 'business_id']]
# May also want to remove from x_cols: 'cool', 'funny', 'useful', 'stars', 'categories', 'review_count' 

x_df = all_features_business[x_cols]

x = x_df.values
y = y_df.values

# Classifier wants 1/0, not T/F
y = y.astype(int)

In [51]:
y

array([[1, 1, 1, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 1, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0]])

In [82]:
# Train/Test Split

from sklearn.model_selection import train_test_split

X_train, X_trest, y_train, y_test = train_test_split(x, y, test_size=0.2)

# Category Prediction

In [83]:
# Multilabel Classification
# RandomForestClassifier supports multilabel classification

# Most other classifiers will require use of 
    # sklearn.multioutput.MultiOutputClassifier to run a separate classifier model for each targe
    
from sklearn.ensemble import RandomForestClassifier

In [84]:
rfc = RandomForestClassifier(n_estimators=10, n_jobs=-1)

In [85]:
rfc.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

## Plot ROC curve to assess

In our case we want a TPR (True Positive Rate, plotted on the y-axis) close to 1 since we want to Recall ALL correct categories. 

We also want a FPR (False Positive Rate, plotted on the x-axis) that is nonzero, but probably the smallest value that maximizes TPR. This is because we want some FPs (False Positives) because they are WHAT WE ARE RECOMMENDING. 

In [94]:
from sklearn.metrics import plot_roc_curve

ax = plt.gca()
rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=ax)
plt.show()

ImportError: cannot import name 'plot_roc_curve' from 'sklearn.metrics' (/Users/daviderickson/anaconda3/lib/python3.7/site-packages/sklearn/metrics/__init__.py)

In [91]:
break

SyntaxError: 'break' outside loop (<ipython-input-91-6aaf1f276005>, line 4)

In [56]:
predict0 = rfc.predict(x[0].reshape(1,-1))
predict0

array([[1., 1., 1., ..., 0., 0., 0.]])

In [71]:
all_cats_ser = pd.Series(data=all_cats)
y0cats = all_cats_ser.loc[y[0]==1]
list(y0cats)

['Nightlife', 'SportsBars', 'Restaurants', 'Bars']

In [72]:
all_cats_ser.loc[range(5)]

0                Nightlife
1               SportsBars
2              Restaurants
3                     Bars
4    American(Traditional)
dtype: object

In [None]:
all_features_business.iloc[0]

In [None]:
plt.figure(figsize=(15,5))
plt.bar(range(predict0.shape[1]), predict0[0])

In [None]:
plt.figure(figsize=(15,5))
plt.bar(range(predict0.shape[1]), y[0])

In [None]:
break

In [None]:
# Cluster users using K-means
from sklearn.cluster import MiniBatchKMeans

max_clusters = 200 # 10
kmeans_cost = []
for num_clusters in range(1,max_clusters):
    k_means_clutering = MiniBatchKMeans(n_clusters=num_clusters)
    k_means_clutering.fit(reviewDataVecs[~np.isnan(reviewDataVecs).any(axis=1)]) # Drop rows that have NAN
    kmeans_cost.append(k_means_clutering.inertia_)
    

In [None]:
# Determine the best value of K to use (the number of clusters)
# plot the cost against K values 
plt.plot(range(1, max_clusters), kmeans_cost, color ='g', linewidth ='3') 
plt.title('20 Clusters of Reviews:', fontsize=20)
plt.xlabel("Value of K") 
plt.ylabel("Sqaured Error (Cost)") 
plt.show() # clear the plot 

In [None]:
# Cluster users using K-means
# Interpret user clusters

from sklearn.cluster import KMeans

num_clusters = 20
k_means_clutering = KMeans(n_clusters=num_clusters)
k_means_clutering.fit(reviewDataVecs[~np.isnan(reviewDataVecs).any(axis=1)])

In [None]:
cluster_centers_df = pd.DataFrame(data=k_means_clutering.cluster_centers_, columns=all_features_df.iloc[:,:-4].columns)
cluster_centers_df.head()

In [None]:
cluster_centers_df = cluster_centers_df.merge(right=all_features_df.dropna()[['cool', 'funny', 'useful', 'stars']].groupby(by=k_means_clutering.labels_).mean(), right_index=True, left_index=True)


In [None]:
cluster_centers_df.head()

In [None]:
cluster_centers_df.iloc[0]['stars']

In [None]:
from wordcloud import WordCloud

for cluster in range(len(cluster_centers_df)):
#     series = user_cluster_centers_df.iloc[cluster,:-4] #Use relevant row, drop non-word cols
#     wc_dict = series.to_dict()
    series = model.wv.similar_by_vector(cluster_centers_df.iloc[cluster,:-4].values, topn=100)
    wc_dict = {}
    for key, val in series:
        wc_dict[key] = val
    wordcloud = WordCloud().generate_from_frequencies(wc_dict)
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.title('Cluster{0:3d}: {1:.2f}stars, {2:.2f}useful'.format(cluster, cluster_centers_df.iloc[cluster]['stars'], 
                                                    cluster_centers_df.iloc[cluster]['useful']), fontsize=20)
    plt.axis('off')
    plt.show()
    

In [None]:
print('The most similar words to the avg vector describing each user cluster:\n')
for cluster in range(len(cluster_centers_df)):
    print('Cluster {}'.format(cluster))
    display(
        model.wv.similar_by_vector(cluster_centers_df.iloc[cluster,:].values, topn=10)
    )

# Cluster with metadata (useful, cool, funny, stars)

In [None]:
# Cluster users using K-means
from sklearn.cluster import MiniBatchKMeans

max_clusters = 200 # 10
kmeans_cost = []
for num_clusters in range(1,max_clusters):
    k_means_clutering = MiniBatchKMeans(n_clusters=num_clusters)
    k_means_clutering.fit(all_features_df.dropna()) # Drop rows that have NAN
    kmeans_cost.append(k_means_clutering.inertia_)
    

In [None]:
# Determine the best value of K to use (the number of clusters)
# plot the cost against K values 
plt.plot(range(1, max_clusters), kmeans_cost, color ='g', linewidth ='3') 
plt.title('20 Clusters of Reviews:', fontsize=20)
plt.xlabel("Value of K") 
plt.ylabel("Sqaured Error (Cost)") 
plt.show() # clear the plot 

In [None]:
# Cluster users using K-means
# Interpret user clusters

from sklearn.cluster import KMeans

num_clusters = 20
k_means_clutering = KMeans(n_clusters=num_clusters)
k_means_clutering.fit(all_features_df.dropna())

In [None]:
cluster_centers_df = pd.DataFrame(data=k_means_clutering.cluster_centers_, columns=all_features_df.iloc[:,:].columns)
# cluster_centers_df = cluster_centers_df.merge(right=all_features_df.dropna()[['cool', 'funny', 'useful', 'stars']].groupby(by=k_means_clutering.labels_).mean(), right_index=True, left_index=True)
cluster_centers_df.head()


In [None]:
from wordcloud import WordCloud

for cluster in range(len(cluster_centers_df)):
#     series = user_cluster_centers_df.iloc[cluster,:-4] #Use relevant row, drop non-word cols
#     wc_dict = series.to_dict()
    series = model.wv.similar_by_vector(cluster_centers_df.iloc[cluster,:-4].values, topn=100)
    wc_dict = {}
    for key, val in series:
        wc_dict[key] = val
    wordcloud = WordCloud().generate_from_frequencies(wc_dict)
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.title('Cluster{0:3d}: {1:.2f}stars, {2:.2f}useful'.format(cluster, cluster_centers_df.iloc[cluster]['stars'], 
                                                    cluster_centers_df.iloc[cluster]['useful']), fontsize=20)
    plt.axis('off')
    plt.show()
    

In [None]:
for col in ['cool', 'funny', 'useful', 'stars']:
    plt.bar(x=range(len(cluster_centers_df)), height=cluster_centers_df[col])
    plt.title('{}'.format(col))
    plt.show()
# display(cluster_centers_df[['cool', 'funny', 'useful', 'stars']])