## Import libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
%matplotlib inline
pd.options.display.max_columns = None

## Pre-Processing

In [2]:
dataPath = Path("../data/shopify/raw/")
[file for file in dataPath.glob('*')]

[PosixPath('../data/shopify/raw/apps.csv'),
 PosixPath('../data/shopify/raw/apps_categories.csv'),
 PosixPath('../data/shopify/raw/categories.csv'),
 PosixPath('../data/shopify/raw/key_benefits.csv'),
 PosixPath('../data/shopify/raw/pricing_plans.csv'),
 PosixPath('../data/shopify/raw/pricing_plan_features.csv'),
 PosixPath('../data/shopify/raw/reviews.csv')]

In [3]:
apps_df = pd.read_csv(dataPath/"apps.csv").rename(columns={"id": "app_id"}).loc[:, [
    'app_id', "url", "title", "description", "tagline"]]
print(apps_df.shape)
apps_df.head()

(4750, 5)


Unnamed: 0,app_id,url,title,description,tagline
0,9e4748a9-7eda-4814-83b6-0537d44152b1,https://apps.shopify.com/translate,Panda Language Translate,Panda Language Translation improve your busine...,Translate your store into multiple languages
1,d1476138-a608-4bb9-8d39-b30f3ca7617d,https://apps.shopify.com/instant-brand-page,Instant Brand Page,Having a brand page gives your shoppers a quic...,A-Z Brand Index Page and Favourites Slider
2,d6e49a3c-2f9f-4bfa-8c26-5d024faf2241,https://apps.shopify.com/powr-faq,FAQ Accordion | Help Center,"Create, organize and display Frequently Asked ...","FAQ page, FAQ accordion menu for product Info ..."
3,0ef0087f-3ae5-4dbc-84e0-193b576d82ed,https://apps.shopify.com/mps-promote-me,Promote Me | Many apps in one,Promote Me app is a bundle of apps that includ...,"Spin Wheel,Currency Converter,Quick ATC Button..."
4,7aac2a1f-ff03-4f38-aeb7-7619403a6f05,https://apps.shopify.com/installify-boost-mobi...,Instalify,We are trusted by hundreds of Shopify and Shop...,Supercharge Your Mobile App Installs


In [8]:
## Getting apps and their categories
appCategories_df = pd.read_csv(dataPath/"apps_categories.csv")
categories_df = pd.read_csv(dataPath/"categories.csv").rename(columns={"id":"category_id", "title":"category"})
appCategories_df = appCategories_df.merge(categories_df).drop(["category_id"], axis=1)
del categories_df
print(appCategories_df.shape)
appCategories_df.head()

(7376, 2)


Unnamed: 0,app_id,category
0,9e4748a9-7eda-4814-83b6-0537d44152b1,Store design
1,d1476138-a608-4bb9-8d39-b30f3ca7617d,Store design
2,d6e49a3c-2f9f-4bfa-8c26-5d024faf2241,Store design
3,0ef0087f-3ae5-4dbc-84e0-193b576d82ed,Store design
4,7aac2a1f-ff03-4f38-aeb7-7619403a6f05,Store design


In [9]:
appCategories_df.category.value_counts()

Store design                         2087
Sales and conversion optimization    1294
Marketing                            1068
Orders and shipping                   858
Customer support                      409
Reporting                             350
Inventory management                  342
Productivity                          282
Finding and adding products           252
Finances                              163
Trust and security                    144
Places to sell                        127
Name: category, dtype: int64

In [10]:
appCategories_df = appCategories_df.groupby("app_id", as_index=False).agg({'category': lambda x : ','.join(x)})
appCategories_df.head()

Unnamed: 0,app_id,category
0,00014403-ad17-4c9b-bbcf-5adc27ba18a9,"Inventory management,Reporting"
1,000270a8-af0b-4862-82df-25decbcc8d86,Store design
2,0006d5af-c461-4bdb-b32e-a4fecd00846b,Store design
3,001ce7ab-14dc-45ce-98f1-1b42b61c86c1,"Store design,Sales and conversion optimization"
4,00215fb3-ff7c-4111-8f0c-8e7e2a9027fb,Marketing


In [11]:
apps_df = apps_df.merge(appCategories_df)
print(apps_df.shape)
apps_df.head()

(4750, 6)


Unnamed: 0,app_id,url,title,description,tagline,category
0,9e4748a9-7eda-4814-83b6-0537d44152b1,https://apps.shopify.com/translate,Panda Language Translate,Panda Language Translation improve your busine...,Translate your store into multiple languages,Store design
1,d1476138-a608-4bb9-8d39-b30f3ca7617d,https://apps.shopify.com/instant-brand-page,Instant Brand Page,Having a brand page gives your shoppers a quic...,A-Z Brand Index Page and Favourites Slider,Store design
2,d6e49a3c-2f9f-4bfa-8c26-5d024faf2241,https://apps.shopify.com/powr-faq,FAQ Accordion | Help Center,"Create, organize and display Frequently Asked ...","FAQ page, FAQ accordion menu for product Info ...","Store design,Customer support"
3,0ef0087f-3ae5-4dbc-84e0-193b576d82ed,https://apps.shopify.com/mps-promote-me,Promote Me | Many apps in one,Promote Me app is a bundle of apps that includ...,"Spin Wheel,Currency Converter,Quick ATC Button...","Store design,Sales and conversion optimization"
4,7aac2a1f-ff03-4f38-aeb7-7619403a6f05,https://apps.shopify.com/installify-boost-mobi...,Instalify,We are trusted by hundreds of Shopify and Shop...,Supercharge Your Mobile App Installs,Store design


In [13]:
apps_df.to_csv("../data/shopify/shopify_apps.csv", index=False)