In [45]:
import requests
import json
import csv
import pandas as pd
import numpy as np

In [41]:
url_ = "https://fakestoreapi.com/products"

In [42]:
def get_leaves(item, key=None, key_prefix=""):
    """
    The function converts nested dictionary structure to flat 
    to avoid dictionary-like values in the result
    """
    if isinstance(item, dict): # Check if the item in a server reponse is a dictionary
        leaves = {} # Initialize a dictionary that will keep headers names as keys
                    # and specific product's data as values
        for item_key in item.keys():
        # recursivelly call a get_leaves() function until we check every item
        # in order to find another header
            temp_key_prefix = (
              item_key if (key_prefix == "") else (key_prefix + " " + str(item_key)))
            leaves.update(get_leaves(item[item_key], item_key, temp_key_prefix))
        return leaves
    else:
        return {key_prefix: item} # If the item provided isn't a dictionary, 
                                    # then just return a dictionary with an item

In [43]:
# The following function returns only last products from the web site. To retrieve products
# using some condition we need to use other input data (e.g. by specifying a category
# in the URL) and change the function itself.
def get_products(url, number=None):
    if number is not None:
        url_n = url+f"?limit={number}"
    if number is None:
        number = "all_products"
    # request data from the server
    r = requests.get(url_n)
    text = r.text
    # create json object
    response = json.loads(text)
    # create output csv file
    with open(f"output_{number}.csv", "w", newline="", encoding='utf-8') as f_output:
        # use list data structure to preserve the initial order of the headers 
        fieldnames = list()
        for entry in response:
            # call get_leaves() function on every entry from the server responce to retrieve 
            # all headers for the csv file. note that the list'll contain dublicates,
            # because we iterate throught headers multiple times
            entry_fields = list(get_leaves(entry).keys())
            fieldnames.append(entry_fields)
        # delete duplicates from the headers list
        fieldnames = pd.Series(fieldnames).drop_duplicates().tolist()[0]
        # initialize a writer object for recording headers and rows into the csv file
        csv_output = csv.DictWriter(f_output, fieldnames=fieldnames)
        csv_output.writeheader()
        csv_output.writerows(get_leaves(entry) for entry in response)

In [44]:
get_products(url_,10)

In [64]:
with open("output_10.csv", "r", encoding='utf-8') as file:
    data = pd.read_csv(file)
    #data.sort_values(by='price', ascending=True, inplace=True)
    cheap_perc = np.percentile(data['price'], 30)
    middle_perc = np.percentile(data['price'], [40,70])
    expensive_perc = np.percentile(data['price'], 70)
    cheap_df = data[data['price']<=cheap_perc]
    middle_df = data[(middle_perc[0]<=data['price']) & (data['price']<=middle_perc[1])]
    expensive_df = data[data['price']>=expensive_perc]
    cheap_df.to_excel("cheap.xlsx")
    middle_df.to_excel("middle.xlsx")
    expensive_df.to_excel("expensive.xlsx")