In [95]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
from os import walk
import regex as re
import plotly.graph_objects as go

In [28]:
def get_filecount(BRAND):
    files = []
    for (dirpath, dirnames, filenames) in walk(f'fabricdotcom/{BRAND}/'):
        files.extend(filenames)
        break
    return len(files)

In [49]:
f = open(f'fabricdotcom/AH/1.html', "r")
data = f.read()
soup = BeautifulSoup(data, 'html.parser')
items = soup.find_all("div", {"class": "product-item"})

In [73]:
item = items[5]

In [91]:
def get_data(BRAND):
    df = pd.DataFrame(columns=['page','number','name','low_stock','not_available','price','url'])
    filecount = get_filecount(BRAND)
    for page in list(np.arange(1,filecount+1)):
        try:
            f = open(f'fabricdotcom/{BRAND}/{page}.html', "r")
            data = f.read()
            soup = BeautifulSoup(data, 'html.parser')
            items = soup.find_all("div", {"class": "product-item"})
            i=0
            for item in items:
                d = {}
                desc = item.find("a", {"class":"brand-text"})
                d['page']=page
                d['number']=i
                d['name'] = desc.string
                price = item.select('span[id*="Price"]')[0].get_text()
                if price:
                    d['price']=price
                else:
                    d['price']=None
                d['url'] = desc['href']
                try:
                    lowstock = item.find("span", {"class": "LowStockLabel"}).find("span")
                    if lowstock:
                        d['low_stock'] = lowstock.string
                except:
                    d['low_stock'] = None
                try:
                    if item.find("span", {"class": "NotAvailableLabel"}):
                        d['not_available'] = True
                    else:
                        d['not_available'] = False
                except:
                    d['not_available'] = False
                df = df.append(d, ignore_index=True)
                i+=1
        except:
            print(f'Error reading file fabricdotcom/{BRAND}/{page}.html')
    df = df.sort_values(by=['not_available','low_stock'],ascending=False).reset_index().rename(columns={'index':'bestseller_rank'})
    df.to_csv(f'data/fabricdotcom_{BRAND}')
    return df

In [98]:
AH = get_data('AH')

In [97]:
MM = get_data('MM')

Error reading file fabricdotcom/MM/24.html


In [96]:
KF = get_data('KF')

In [115]:
def viz_available(BRAND, DF):
    available = DF[DF['not_available']==False]
    not_available = DF[DF['not_available']==True]
    fig = go.Figure()
    for df in [available, not_available]:
        fig.add_trace(
            go.Scatter(x=[int(r) for r in df['bestseller_rank']], y=[float(p.replace('$', '')) for p in df['price']], mode='markers', text=[str(n) for n in df['name']]))
    fig.update_layout(
        xaxis_title="Rank (Best Sellers on Right)",
        yaxis_title="Price",
        title=f"{BRAND}: Available (blue) vs. Not Available (red)"
    )
    fig.update_xaxes(autorange="reversed")
    fig.write_html(f"viz/fabricdotcom_{BRAND.replace(' ','-')}_available.html")
    fig.show()

In [116]:
viz_available("Alexander Henry", AH)

In [117]:
viz_available("Kaffe Fassett", KF)

In [118]:
viz_available("Michael Miller", MM)