# Steg 1: Generera Realistiskt Dataset

**Target: CTR (Click-Through Rate)**  
CTR = klick / visningar. Bestäms av bilden – inte pris eller kassa.

**Vad vi bygger här:**  
En formel där product features + image settings → ett trovärdigt CTR-värde med realistiska mönster och lite brus.

In [1]:
import json
import random
import pandas as pd
from pathlib import Path

random.seed(42)

OUTPUT_PATH = Path('../../data/ctr_db.json')
print(f"Output: {OUTPUT_PATH.resolve()}")

Output: /Users/edvinrunhellen/Documents/MAI24HA/Exjobb/skejl/data/ctr_db.json


## Taxonomier

In [2]:
GARMENT_TYPES = ['hoodie', 't-shirt', 'jacket', 'jeans', 'zip-up hoodie']
COLORS        = ['dark', 'light', 'colorful', 'black', 'white']
FITS          = ['loose', 'regular', 'tight', 'oversized']
GENDERS       = ['male', 'female', 'unisex']

STYLES      = ['urban_outdoor', 'studio_minimal', 'lifestyle_indoor',
               'casual_lifestyle', 'streetwear', 'lifestyle_outdoor']
LIGHTINGS   = ['golden_hour', 'studio', 'natural', 'overcast', 'dramatic']
BACKGROUNDS = ['studio_white', 'studio_grey', 'neutral_wall',
               'urban_street', 'graffiti_wall', 'nature_outdoor',
               'park', 'busy_pattern']
POSES       = ['walking', 'standing', 'action', 'sitting', 'dynamic', 'casual']
EXPRESSIONS = ['confident', 'serious', 'smiling', 'neutral', 'focused']
ANGLES      = ['front', 'side', '3/4', 'back']

## CTR-formel

Tre faktorer styr CTR för en produktbild:
1. **Visual impact** – syns produkten tydligt?
2. **Context match** – matchar stilen produkten?
3. **Emotional appeal** – skapar bilden aspiration?

In [3]:
def calculate_ctr(garment_type, color, fit, gender,
                  style, lighting, background,
                  pose, expression, angle,
                  noise_level=0.006):

    # 1. Bas-CTR per plaggtyp
    ctr = {'hoodie': 0.028, 't-shirt': 0.032, 'jacket': 0.030,
           'jeans': 0.022, 'zip-up hoodie': 0.027}.get(garment_type, 0.028)

    # 2. Ljus + färg-synergi (visual impact)
    if color in ('dark', 'black') and lighting == 'golden_hour':   ctr += 0.010
    elif color in ('dark', 'black') and lighting == 'dramatic':    ctr += 0.008
    elif color in ('dark', 'black') and lighting == 'studio':      ctr += 0.004
    elif color in ('dark', 'black') and lighting == 'overcast':    ctr -= 0.005
    elif color in ('light', 'white') and lighting == 'studio':     ctr += 0.009
    elif color in ('light', 'white') and lighting == 'natural':    ctr += 0.007
    elif color in ('light', 'white') and lighting == 'golden_hour':ctr += 0.005
    elif color in ('light', 'white') and lighting == 'overcast':   ctr += 0.003
    elif color == 'colorful' and lighting == 'natural':            ctr += 0.011
    elif color == 'colorful' and lighting == 'studio':             ctr += 0.008
    elif color == 'colorful' and lighting == 'overcast':           ctr -= 0.003

    # 3. Plagg + stil (context match)
    if garment_type in ('hoodie', 'zip-up hoodie') and style == 'urban_outdoor':   ctr += 0.012
    elif garment_type in ('hoodie', 'zip-up hoodie') and style == 'streetwear':    ctr += 0.011
    elif garment_type in ('hoodie', 'zip-up hoodie') and style == 'lifestyle_indoor': ctr += 0.006
    elif garment_type in ('hoodie', 'zip-up hoodie') and style == 'studio_minimal':ctr += 0.004
    elif garment_type == 't-shirt' and style == 'studio_minimal':                  ctr += 0.010
    elif garment_type == 't-shirt' and style == 'casual_lifestyle':                ctr += 0.009
    elif garment_type == 't-shirt' and style == 'streetwear':                      ctr += 0.007
    elif garment_type == 'jacket' and style == 'lifestyle_outdoor':                ctr += 0.011
    elif garment_type == 'jacket' and style == 'urban_outdoor':                    ctr += 0.009
    elif garment_type == 'jacket' and style == 'studio_minimal':                   ctr += 0.005
    elif garment_type == 'jeans' and style == 'casual_lifestyle':                  ctr += 0.008
    elif garment_type == 'jeans' and style == 'urban_outdoor':                     ctr += 0.006

    # 4. Bakgrund
    if background in ('studio_white', 'studio_grey'):                              ctr += 0.006
    elif background == 'neutral_wall':                                             ctr += 0.004
    elif background in ('urban_street', 'graffiti_wall') and garment_type in ('hoodie', 'zip-up hoodie', 'jacket'): ctr += 0.007
    elif background in ('urban_street', 'graffiti_wall') and garment_type == 't-shirt': ctr += 0.004
    elif background in ('nature_outdoor', 'park') and garment_type == 'jacket':    ctr += 0.006
    elif background == 'busy_pattern':                                             ctr -= 0.007

    # 5. Expression (emotional appeal)
    if gender == 'male':
        exp_bonus = {'confident': 0.007, 'serious': 0.005, 'focused': 0.004,
                     'smiling': 0.003, 'neutral': 0.002}.get(expression, 0.002)
    elif gender == 'female':
        exp_bonus = {'smiling': 0.008, 'confident': 0.007, 'focused': 0.005,
                     'serious': 0.004, 'neutral': 0.002}.get(expression, 0.002)
    else:  # unisex
        exp_bonus = {'confident': 0.004, 'neutral': 0.004}.get(expression, 0.002)
    ctr += exp_bonus

    # 6. Pose
    if pose == 'dynamic':                                                          ctr += 0.006
    elif pose == 'walking':                                                        ctr += 0.005
    elif pose == 'action' and garment_type in ('hoodie', 'jacket', 'zip-up hoodie'): ctr += 0.006
    elif pose == 'casual':                                                         ctr += 0.003
    elif pose == 'standing':                                                       ctr += 0.002
    elif pose == 'sitting' and style == 'lifestyle_indoor':                        ctr += 0.004

    # 7. Vinkel
    if angle == '3/4':    ctr += 0.004
    elif angle == 'front': ctr += 0.002
    elif angle == 'side':  ctr -= 0.001
    elif angle == 'back' and garment_type in ('hoodie', 'jacket'): ctr += 0.003

    # 8. Realistiskt brus (normalfördelat)
    ctr += random.gauss(0, noise_level)

    return round(max(0.005, min(0.08, ctr)), 4)


print('Formel definierad.')

Formel definierad.


## Snabbtest – extremfall utan brus

In [4]:
cases = [
    ('OPTIMAL dark hoodie',
     dict(garment_type='hoodie', color='dark', fit='loose', gender='male',
          style='urban_outdoor', lighting='golden_hour', background='graffiti_wall',
          pose='dynamic', expression='confident', angle='3/4', noise_level=0)),
    ('OPTIMAL light t-shirt',
     dict(garment_type='t-shirt', color='light', fit='regular', gender='female',
          style='studio_minimal', lighting='studio', background='studio_white',
          pose='walking', expression='smiling', angle='3/4', noise_level=0)),
    ('SUBOPTIMAL dark jeans',
     dict(garment_type='jeans', color='dark', fit='tight', gender='male',
          style='studio_minimal', lighting='overcast', background='busy_pattern',
          pose='standing', expression='neutral', angle='side', noise_level=0)),
]

for name, params in cases:
    ctr = calculate_ctr(**params)
    print(f'{name:<30}: {ctr*100:.2f}%')

OPTIMAL dark hoodie           : 7.40%
OPTIMAL light t-shirt         : 7.40%
SUBOPTIMAL dark jeans         : 1.30%


## Generera 5000 produkter

In [5]:
def make_product():
    g  = random.choice(GARMENT_TYPES)
    c  = random.choice(COLORS)
    f  = random.choice(FITS)
    ge = random.choice(GENDERS)
    st = random.choice(STYLES)
    li = random.choice(LIGHTINGS)
    bg = random.choice(BACKGROUNDS)
    po = random.choice(POSES)
    ex = random.choice(EXPRESSIONS)
    an = random.choice(ANGLES)
    return {
        'garment_type': g, 'color': c, 'fit': f, 'gender': ge,
        'style': st, 'lighting': li, 'background': bg,
        'pose': po, 'expression': ex, 'angle': an,
        'ctr': calculate_ctr(g, c, f, ge, st, li, bg, po, ex, an),
        'impressions': random.randint(1000, 5000)
    }

products = [make_product() for _ in range(5000)]
df = pd.DataFrame(products)

print(f'Genererade {len(df)} produkter')
print(df['ctr'].describe())

Genererade 5000 produkter
count    5000.000000
mean        0.047622
std         0.011730
min         0.009000
25%         0.039700
50%         0.048100
75%         0.055800
max         0.080000
Name: ctr, dtype: float64


## Spara

In [6]:
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_PATH, 'w') as f:
    json.dump(df.to_dict('records'), f, indent=2)

print(f'Sparad: {OUTPUT_PATH.resolve()}')
print(f'Antal: {len(df)}')
print(f'CTR range: {df["ctr"].min()*100:.1f}% – {df["ctr"].max()*100:.1f}%')
print(f'Medel CTR: {df["ctr"].mean()*100:.2f}%')

Sparad: /Users/edvinrunhellen/Documents/MAI24HA/Exjobb/skejl/data/ctr_db.json
Antal: 5000
CTR range: 0.9% – 8.0%
Medel CTR: 4.76%
