In [2]:
import pandas as pd
import numpy as np
import json
import requests
import time
import pymysql
api_key = '?api_key=***'

def getAllItems (prefix, store_name, suffix, flags = '' ):
    num_of_items = getItemCount(prefix, store_name, suffix, flags)
    print ('=== Fetching ' + str(num_of_items) + " items of " + suffix + " for " + store_name + ' ===')
    df_result = getPageItems(prefix, store_name, suffix, flags, page = 1) 
    for page in range (2, 1 + int(np.ceil(num_of_items / 100))):               
        results_n = getPageItems(prefix, store_name, suffix, flags, page) 
        df_result = df_result.append(results_n)
    print ('Done')
    return df_result

def getPageItems (prefix, store_name, suffix, flags , page):
    print("Getting page " + str(page))
    time.sleep(1)
    response = getattr(requests, 'get')('https://openapi.etsy.com' + prefix + store_name +
                                        suffix + api_key + flags +
                                       '&limit=100&offset=' + str((page - 1) * 100))
    r = response.json()
    return pd.DataFrame(r['results'])

def getItemCount (prefix, store_name, suffix, flags):
    response = getattr(requests, 'get')('https://openapi.etsy.com' + prefix + store_name +
                                        suffix + api_key + flags +
                                       '&limit=1' )
    r = response.json()
    return r['count']

def getUserFromStoreName( store_name):
    response = getattr(requests, 'get')('https://openapi.etsy.com' + '/v2/shops/' + store_name +
                                        api_key + 
                                       '&fields=user_id&limit=1' )
    r = response.json()
    return str(r['results'][0]['user_id'])

def getPerformanceForStore (store_name):
    df_perf = pd.DataFrame(columns=['user_id', 'store_name', 'timestamp', 'views', 'favorites', 'sales'])

    user_id = getUserFromStoreName(store_name)
    df_sales = getAllItems( '/v2/users/', user_id, '/profile')
    df_listings = getAllItems('/v2/shops/', store_name, '/listings/active' )
    output_filename = './' + store_name + '_' + time.strftime("%Y-%m-%d") +'.csv'
    df_listings.to_csv(output_filename)
    print('Wrote listings data to ' + output_filename)
    df_sales['views'] = df_listings.views.sum().astype(str)
    df_sales['favorites'] = df_listings.num_favorers.sum().astype(str)
    df_sales['sales'] = df_sales['transaction_sold_count'].astype(str)
    df_sales['store_name'] = store_name
    df_sales['timestamp'] = (int(time.time() / 1000) * 1000)
    df = df_sales[['user_id', 'store_name', 'timestamp', 'views', 'favorites', 'sales']]
    df_perf['user_id'] = df_perf['user_id'].astype(float).astype(int)
    df_perf['timestamp'] = df_perf['timestamp'].astype(float).astype(int)
    df_perf['views'] = df_perf['views'].astype(int)  
    df_perf['favorites'] = df_perf['favorites'].astype(int)  
    df_perf['sales'] = df_perf['sales'].astype(int)  
    df_perf = df_perf.append(df)
    df_perf.reset_index(drop=True, inplace=True)
    return (df_sales, df_listings)

# df_sales, df_listings = getPerformanceForStore('StoreName')

In [3]:
import math
def tf( word, line):
    return line.count(word) / len(line)

def n_containing(word, lines):
    return sum(1 for line in lines if word in line)

def idf (word, lines):
    return math.log (len (lines) / (n_containing(word, lines) + 1))

def tfidf (word, line, lines):
    return tf (word, line) * idf (word, lines)

def calc_tags(line, lines, num_of_tags = 3):
    scores = {word: tfidf(word, line, lines) for word in line}
    sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    sorted_words = sorted_words[0:num_of_tags]
    return [x[0] for x in sorted_words]

df['unique_tags'] = df['tags'].apply( lambda line: calc_tags(line, df['tags']))    
df_favorites = df[['listing_id','title', 'unique_tags', 'views', 'num_favorers']]
df_favorites['fav_rate'] = df_favorites['num_favorers']/df_favorites['views']
df_sorted = df_favorites.sort_values('fav_rate')

In [15]:
from collections import defaultdict

all_tags = [item for row in df['tags'] for item in row]
d = defaultdict(int)
for word in all_tags:
    d[word] += 1
all_tags_df = pd.DataFrame.from_dict(d, orient='index')
all_tags_df.columns = ['freq']

In [None]:
from wordcloud import WordCloud
text_for_wordcloud = ",".join(all_tags)

# Generate a word cloud image
wordcloud = WordCloud().generate(text_for_wordcloud)

# Display the generated image:
# the matplotlib way:
import matplotlib.pyplot as plt
plt.imshow(wordcloud)
plt.axis("off")

# lower max_font_size
wordcloud = WordCloud(max_font_size=30).generate(text_for_wordcloud)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()