In [91]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [92]:
import pandas as pd
import requests
import json

In [119]:
class eSamudaay:

  def __init__(self, filepath):

    self.filepath = filepath
    self.urldict = None
    with open(self.filepath) as json_file:
      self.urldict = json.load(json_file)
    self.business = ''
    self.url = ''
    self.data = None
    self.reasons = []
  
  def get_company(self, business):
    self.business = business
    
    self.url = self.urldict[self.business]
    self.fetch_data()
    self.process_data()

  def fetch_data(self):

    response = requests.request("GET", self.url)
    url_return = json.loads(response.text)

    self.data = pd.DataFrame(url_return)


  def process_reasons(self):
    possible_reasons = list(self.data['failure_reasons'])

    given_reasons = []
    for reason in possible_reasons:
      if type(reason) == list:
        given_reasons.extend(reason)
      elif type(reason) == str:
        given_reasons.append(reason)
    
    given_reasons = set(given_reasons)

    self.reasons = list(given_reasons)


  def expand(self, x):
    if x['failure_reasons'] != None:
      for reason in self.reasons:
        if reason in x['failure_reasons']:
          x[reason] = 1
    
    return x


  def process_data(self):
    self.process_reasons()
    self.data[self.reasons] = 0
    self.data = self.data.apply(self.expand, axis = 1)


  def get_inventory(self):
    return self.data['product_name'].value_counts()
  
  
  def get_business_names(self):
    return list(self.urldict.keys())
  
  def get_product_data(self):
    attributes = ['sku_id', 'product_name', 'failure_reasons']
    product_data = self.data[attributes]
    return product_data
  
  def issues(self):
    return self.data[self.reasons].sum().to_dict()

  def product_stats(self):
    inventory = self.get_inventory()
    products_sum = self.data.drop(['sku_id', 'failure_reasons'], axis = 1).groupby(by = 'product_name').sum()
    product_stats = pd.DataFrame()
    for reason in self.reasons:
      product_stats['%' + reason] = products_sum[reason]/inventory * 100
    product_stats = product_stats.reset_index()
    product_stats = product_stats.rename( columns = {'index': 'product_name'})
    product_stats.fillna(0)
    return product_stats
  
  def get_error_rate(self):
    total_inventory = self.get_inventory().sum()
    total_errors = self.data.drop(['sku_id', 'failure_reasons' ], axis = 1).groupby(by = 'product_name').sum().sum(axis = 1).sum()
    if total_inventory != 0:
      print(total_errors)
      error_rate = total_errors/total_inventory
      print(total_inventory)
      return error_rate
    else:
      return 0
  
  def get_classification(self):
    error_rate = self.get_error_rate()
    if error_rate <1:
      return 1
    elif error_rate == 1:
      return 2
    else:
      return 3

In [120]:
hack = eSamudaay('/content/drive/MyDrive/esamudaay/output.json')

In [105]:
hack.get_company('Ravada Stores')

In [106]:
hack.get_error_rate()

2.892156862745098

In [107]:
business_list = hack.get_business_names()
error_rates = []
for name in business_list:
  hack.get_company(name)
  try:
    error_rates.append(hack.get_error_rate())
  except:
    error_rates.append(0)

In [108]:
error = pd.Series(data = error_rates)

In [109]:
error.describe()

count    88.000000
mean      1.067997
std       0.538477
min       0.000000
25%       1.000000
50%       1.000000
75%       1.000000
max       3.544041
dtype: float64

We find that most registered merchants seem to have 1 or another issue reported per product. With the max being 3.54. we can thus create 3 classes based on the data where
  - having error rate < 1 is good 
  - having error rate = 1  requires a degree of correction
  - and having error rate > 1 will require a large degree of correction

In [121]:
hack.get_company('Ravada Stores')

In [122]:
hack.get_classification()

295
102


3