In [1]:
# Crochet Market Analysis
# Author: Ngan Phan
# Date: October 7, 2025
# -------

# Overview
# This project analyzes crochet pattern listings on Ravelry to
# understand what factors influence popularity and rating
# -------

## 1. Introduction

**Goal:**
Analyze crochet pattern listings to see what factors may influence their popularity & engagement.

**Research Questions:**
- What crochet types (amigurumi, clothing items, decorations, etc.) are most popular?
- How does price relate to popularity metrics (favourites & reviews)?
- What keywords or tags are common among high-performance listings?
- Are there any seasonal trends in listings (such as during fall-winter as compared to summer)?

**Data Source:**
Ravelry API (I tried Etsy scraping, but they have anti-scraping measures, unfortunately)

**Libraries Used:**
`pandas`, `numpy`, `matplotlib`, `seaborn`, `wordcloud`

## 2. Data Collection

In [1]:
import requests
from requests.auth import HTTPBasicAuth
import time

username = 'read-201006e2c44e339ed7a0cacda6af1f18'
password = 'E8+oP8aWE4i+BGLYdlVGrB5Mz8rHSaQEYRKMYUFO'

## queries: craft=crochet , category=amigurumi , free=true or free=false , sort=popularity , sort=price

# Example:
print("Popularity Sort:")
for page in  range (1, 3): # for scalability
    url = f"https://api.ravelry.com/patterns/search.json?craft=crochet&sort=popularity&page={page}&page_size=5"
    response = requests.get(url, auth=HTTPBasicAuth(username, password))
    if response.status_code == 200:
        data = response.json()
        for pattern in data["patterns"]:
            print(pattern["name"])
    else:
        print("Error:", response.status_code)

Popularity Sort:
6 Day Star Blanket
Rainbow Ripple Baby Blanket
Scrap Yarn Basket
Mother Bear (Seamless Crochet)
Jethro
Granny Square Chicken
The Granny Hexagon Cardigan
Balloon Dog
Impkin
Emotional Support Chicken™ (Crochet)


In [None]:
# 1. Collect Data: 1000 of most popular crochet patterns

## getting first 20 pages of search query and saving pattern ids
pattern_ids = []

for page in  range (1, 6):
    url = f"https://api.ravelry.com/patterns/search.json?craft=crochet&sort=popularity&page={page}&page_size=50"
    response = requests.get(url, auth=HTTPBasicAuth(username, password))
    if response.status_code == 200:
        data = response.json()
        pattern_ids.extend(p["id"] for p in data["patterns"])
    time.sleep(1)
                           
## using pattern ids to collect detailed information of each pattern
crochet_patterns = []

for pid in pattern_ids:
    url = f"https://api.ravelry.com/patterns/{pid}.json"
    response = requests.get(url, auth=HTTPBasicAuth(username, password))
    if response.status_code == 200:
        data = response.json()
        crochet_patterns.extend(data)
    time.sleep(1)

print("Total Patterns Collected:", len(crochet_patterns))

In [None]:
# 2. Import Libraries & Load Data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud

# Display settings
pd.set_option('display.max_columns', None)
sns.set(style='whitegrid', palette='pastel')

# creating csv file
df = pd.DataFrame(crochet_patterns)
df.to_csv("crochet_patterns.csv", index=False)

# load data
df = pd.read_csv("crochet_patterns.csv")

df.head()

# 3. Data Cleaning & Preparation

In [None]:
# checking missing values
df.info()
df.isnull().sum()

# removing duplicate listings (if any)
df = df.drop_duplicates()

# cleaning up columns
df["price"] = pd.to_numeric(df["list_price"], errors="coerce")
df["favorites"] = pd.to_numeric(df["favorites"], errors="coerce")
df["reviews"] = pd.to_numeric(df["reviews"], errors ="coerce")

# dropping rows with missing/invalid prices
df = df[df["price"].notnull() & (df["price"]>0)]

# extracting pattern type from title of listing
df["pattern_type"] = df["name"].str.extract(r'amigurumi|hat|bag|plush|doll|top|sweater|dress|scarf|blanket|toy|shawl)', expand=False)

df.sample(5)