In [329]:
import pandas as pd
import requests
import time
import random
import sqlite3
import re

from pprint import pprint as pp
from bs4 import BeautifulSoup

In [373]:
def request_get_html(url):
    html = requests.get(url, auth=('user', 'pass'))
    return html.text


def format_string_result(string):
    result = ' '.join(string.split())
    return result.upper()


def calculate_wait_time(lower_limit, upper_limit):
    return random.uniform(lower_limit, upper_limit)


def get_value_from_html(html, tag_type, class_name, tag_attrib=None, str_replace=None):
    try:
        if tag_attrib is not None:
            value = html.find(tag_type, {"class": class_name})[tag_attrib]
        
        else: 
            value = html.find(tag_type, {"class": class_name}).text
    
    except:
        return "N/A"
    
    if str_replace:
        value = value.replace(str_replace, "")
    
    return format_string_result(value)

## Get the latest sold items 

###¬†Available Categories
- Jackets & Coats
- Jeans
- Pants & Jumpsuits
- Short 
- Tops



### Available Genders/ Groups
- Women
- Men
- Kids
- Home
- Pets


The idea here is (ethically) retrieve data about sold items from the Poshmark ressellers website. 

If there are any breaches of user agreements here please get in contact with me! 

In [363]:
# URL Variables
base_url = "https://poshmark.com"
brand = 'lululemon_athletica'
gender = "Women"
category = "Tops"
query_filter = "?availability=sold_out"
page_limit = 1

In [383]:
df = pd.DataFrame(columns=['item_id', 'date_added', 'gender', 'category', 'name', 'size', 'brand', 'list_price', 'sale_price', 'condition', 'link', 'likes', 'comments'])

for i in range(page_limit):
        
    url = base_url + "/brand/" + brand + "-" + gender + "-" + category + query_filter + "&max_id=" + str(i+1)
    soup = BeautifulSoup(request_get_html(url), 'html.parser')
    

    for html_details in soup.find_all("div", {"class": "card card--small"}):
        #   Get Item Name 
        item_name = get_value_from_html(html_details, "img", "ovf--h", tag_attrib="alt")

        #   Get Item Link
        link = base_url + get_value_from_html(html_details, "a", "tile__title tc--b", tag_attrib="href").lower()
        
        #   Get the property listing ID     
        item_id = get_value_from_html(html_details, "a", "tile__title tc--b", tag_attrib="data-et-prop-listing_id")

        #   Get the date added (need to regex a cloudfront url)
        date_added = get_value_from_html(html_details, "img", "ovf--h", tag_attrib="src")
        
        if date_added == 'N/A':
            date_added = get_value_from_html(html_details, "img", "ovf--h", tag_attrib="data-src")

        date_added = re.search('\d{4}\/\d{2}\/\d{2}', date_added)
        date_added = date_added.group(0)
            
            
         #   Get List Price
        list_price = get_value_from_html(html_details, "span", "p--l--1 tc--lg td--lt", str_replace="$")  
        
        #   Get Sale Price
        sale_price = get_value_from_html(html_details, "span", "p--t--1 fw--bold", str_replace="$")        

        #   Get Condition - Not always known
        condition = get_value_from_html(html_details, "span", "condition-tag all-caps tr--uppercase condition-tag--small")

        #   Get Size - Not always known
        size = get_value_from_html(html_details, "a", "tile__details__pipe__size ellipses", str_replace="Size: ")

        #   Get Brand - Not always known
        brand = get_value_from_html(html_details, "a", "tile__details__pipe__brand ellipses")
        
        #   Get the amount of likes the picture recieved 
        try:
            likes_div = html_details.find('div', {"class": 'social-action-bar tile__social-actions'})
            likes = likes_div.find('span').text
        except:
            likes = 0
            continue
        
        #   Get the amount of comments the picture recieved 
#         try:
#             comments_div = html_details.find('div', {"class": 'sd--fl ai--c jc--sb'})
#             coments = comments_div.find('span').text

#         except:
#             comments = 0
#             continue
        

        #   Add to the dataframe
        df = df.append({
            "item_id": item_id,
            "date_added": date_added,
            "gender": gender,
            "category": category,
            'name': item_name, 
            'size': size, 
            "brand": brand, 
            "list_price": list_price,
            'sale_price': sale_price, 
            'condition': condition,
            'link': link,
            'likes': likes,
            'comments': comments
        }, ignore_index=True)
       
    
#     Wait to pull the next page for a few seconds   
    time.sleep(calculate_wait_time(1, 3))


In [384]:
df.head(10)

Unnamed: 0,item_id,date_added,gender,category,name,size,brand,list_price,sale_price,condition,link,likes,comments
0,616475D012D88085513A0D2C,2021/10/11,Women,Tops,LULULEMON SCOOP NECK HEATHERED TEE,4,LULULEMON ATHLETICA,0,25,,https://poshmark.com/listing/lululemon-scoop-n...,,
1,6127ACC82CA9ABC7605ABBDD,2021/08/26,Women,Tops,LULULEMON SCUBA ZIP UP HOODIE SIZE 10,10,LULULEMON ATHLETICA,138,13,,https://poshmark.com/listing/lululemon-scuba-z...,,
2,617856CFCE1E8771ADB41175,2021/10/26,Women,Tops,LULULEMON BLACK ‚ÄúSPIN ME‚Äù RACERBACK TANK TOP,8,LULULEMON ATHLETICA,58,30,,https://poshmark.com/listing/lululemon-black-s...,,
3,6178968EBCDB2F5490101840,2021/10/26,Women,Tops,LULULEMON SCUBA HOODIE SIZE 8!,8,LULULEMON ATHLETICA,100,45,,https://poshmark.com/listing/lululemon-scuba-h...,,
4,614FD4C21E75A847A40FBE97,2021/09/25,Women,Tops,SLEEVELESS MESH PANEL ATHLETIC TOP,S,LULULEMON ATHLETICA,48,20,,https://poshmark.com/listing/sleeveless-mesh-p...,,
5,612EC4D0941F175F9F37EBA5,2021/08/31,Women,Tops,LULULEMON ALIGN TANK IN YELLOW PEAR SIZE 12,12,LULULEMON ATHLETICA,0,35,,https://poshmark.com/listing/lululemon-align-t...,,
6,61608F482E8E4481E18C00CA,2021/10/08,Women,Tops,LULULEMON SWIFTLY TOP SHIRT,6,LULULEMON ATHLETICA,58,36,,https://poshmark.com/listing/lululemon-swiftly...,,
7,604D5ADE81A36C0135BB151D,2021/03/13,Women,Tops,LULULEMON LONG SLEEVE RUNNING SHIRT,6,LULULEMON ATHLETICA,98,23,,https://poshmark.com/listing/lululemon-long-sl...,,
8,61788B72446E590A07B9F554,2021/10/26,Women,Tops,LULULEMON HOODIE,12,LULULEMON ATHLETICA,118,32,,https://poshmark.com/listing/lululemon-hoodie-...,,
9,61494546FF83041673D0D3E7,2021/09/20,Women,Tops,LULULEMON SWIFTLY TECH SHORT SLEEVE CREW WINE RED,6,LULULEMON ATHLETICA,68,50,,https://poshmark.com/listing/lululemon-swiftly...,,


## Create the Database

In [344]:
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by the db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)

    return conn


def select_all_tasks(conn, table_name):
    """
    Query all rows in the tasks table
    :param conn: the Connection object
    :return:
    """
    cur = conn.cursor()
    cur.execute(f"SELECT * FROM {table_name}")

    print_query_result(cur)
    
def print_query_result(cursor):
    
    rows = cursor.fetchall()

    for row in rows:
        print(row)
        

## 1. Create the Table

In [335]:
# Create the table if it does not exist
conn = create_connection('db/sold_items.db')
conn.execute('''CREATE TABLE IF NOT EXISTS sold_items_women
         (ID TEXT PRIMARY KEY     NOT NULL,
         gender            TEXT    NOT NULL,
         category          INT     NOT NULL,
         name              CHAR(100),
         list_price        CHAR(10),
         sale_price        CHAR(10),
         condition         CHAR(10),
         link              CHAR(100),
         likes             CHAR(4),
         comments          CHAR(4),
         date_added        CHAR(10))
         ;''')
conn.close()

## 2. Insert the data into the database

In [345]:
conn = create_connection('db/sold_items.db')

for index, row in df.iterrows():
    
    name = row["name"].replace('"', '')
    insertion_query = f'''
        INSERT INTO sold_items_women (ID,gender,category,name,list_price,sale_price,condition,link, date_added)
        VALUES ("{row["item_id"]}", "{gender}", "{category}", "{name}", "{row["list_price"]}", "{row["sale_price"]}", "{row["condition"]}", "{row["link"]}", "{row["date_added"]}")
        '''
    try:
        conn.execute(insertion_query)
    except Exception as e: 
        print(e)
        continue

# Commit the changes
conn.commit()

# Close the connection
conn.close()

In [327]:
conn = create_connection('db/sold_items.db')
data = select_all_tasks(conn, 'sold_items_women')


"""
Do something with data here 

"""


conn.close()

('615BB4305462C35EDBB705DF', 'Women', 'Pants_&_Jumpsuits', 'LULULEMON ALIGN‚Ñ¢ PANT 25 LEOPARD PRINT', '86', 'NWT', 'https://poshmark.com/listing/lululemon-align-pant-25-leopard-print-615bb4305462c35edbb705df', '2021/10/02')
('615A4DCE946D3734384DEAF7', 'Women', 'Pants_&_Jumpsuits', 'LULULEMON ALIGN‚Ñ¢ PANT 25 LEOPARD PRINT', '86', 'NWT', 'https://poshmark.com/listing/lululemon-align-pant-25-leopard-print-615a4dce946d3734384deaf7', '2021/10/02')
('6159D220CE5F1C6C90F85CC7', 'Women', 'Pants_&_Jumpsuits', 'LULULEMON ALIGN‚Ñ¢ PANT 25 LEOPARD PRINT', '86', 'NWT', 'https://poshmark.com/listing/lululemon-align-pant-25-leopard-print-6159d220ce5f1c6c90f85cc7', '2021/10/02')
('6162365702C187D8FE247650', 'Women', 'Pants_&_Jumpsuits', 'LULULEMON ALIGN‚Ñ¢ PANT 25 FOREST GREEN', '86', 'NWT', 'https://poshmark.com/listing/lululemon-align-pant-25-forest-green-6162365702c187d8fe247650', '2021/10/02')
('6162F7E20C0C96924A81A52F', 'Women', 'Pants_&_Jumpsuits', 'LULULEMON ALIGN‚Ñ¢ PANT 25 GREEN', '86', '

In [346]:
conn = create_connection('db/sold_items.db')
cur = conn.cursor()
cur.execute('''Select count(1) from sold_items_women where cast(sale_price as int)''')
print_query_result(cur)

(480,)


- User inputs
    - Brand
    - Size 
    - Type

 
- This is going to be used to estimate the resale price

What we need to do is take this input, retrieve the data from the database, and calculate average sale value for 
    - This particular size
    - Other sizes
    - What they normally list for versus what they sell for
    