In [48]:
import pandas as pd
import requests
import time
import random

from pprint import pprint as pp
from bs4 import BeautifulSoup

In [49]:
def request_get_html(url):
    html = requests.get(url, auth=('user', 'pass'))
    return html.text


def format_string_result(string):
    result = ' '.join(string.split())
    return result.upper()


def calculate_wait_time(lower_limit, upper_limit):
    return random.uniform(lower_limit, upper_limit)


def get_value_from_html(html, tag_type, class_name, tag_attrib=None, str_replace=None):
    try:
        if tag_attrib is not None:
            value = html.find(tag_type, {"class": class_name})[tag_attrib]
    
        else:
            value = html.find(tag_type, {"class": class_name}).text
    
    except:
        return "N/A"
    
    if str_replace:
        value = value.replace(str_replace, "")
    
    return format_string_result(value)

## Get the latest sold items 

#### Available Categories
- Jackets & Coats
- Jeans
- Pants & Jumpsuits
- Short 
- Tops



### Available Genders/ Groups
- Women
- Men
- Kids
- Home
- Pets


In [61]:
# URL Variables
base_url = "https://poshmark.com"
brand = 'lululemon_athletica'
gender = "Women"
category = "Pants_&_Jumpsuits"
query_filter = "?availability=sold_out"
page_limit = 5

In [62]:
df = pd.DataFrame(columns=['gender', 'category', 'name', 'size', 'brand', 'sale_price', 'condition', 'link'])

for i in range(page_limit):
    
    time.sleep(calculate_wait_time(3, 7))
    
    url = base_url + "/brand/" + brand + "-" + gender + "-" + category + query_filter + "max_id=" + str(i)
    soup = BeautifulSoup(request_get_html(url), 'html.parser')
    
    for html_details in soup.find_all("div", {"class": "item__details"}):
        #   Get Item Name 
        item_name = format_string_result(html_details.a.text)
        
        #   Get Item Link
        link = base_url + get_value_from_html(html_details, "a", "tile__title tc--b", tag_attrib="href")
            
        #   Get Sale Price
        sale_price = get_value_from_html(html_details, "span", "p--t--1 fw--bold", str_replace="$")        
        
        #   Get Condition - Not always known
        condition = get_value_from_html(html_details, "span", "condition-tag all-caps tr--uppercase condition-tag--small")
               
        #   Get Size - Not always known
        size = get_value_from_html(html_details, "a", "tile__details__pipe__size ellipses", str_replace="Size: ")

        #   Get Brand - Not always known
        brand = get_value_from_html(html_details, "a", "tile__details__pipe__brand ellipses")
        
        #   Add to the dataframe
        df = df.append({
            "gender": gender,
            "category"
            'name': item_name, 
            'size': size, 
            "brand": brand, 
            'sale_price': sale_price, 
            'condition': condition,
            'link': link
        }, ignore_index=True)

In [64]:
df.to_csv("result_tuesday.csv")