In [16]:
import pandas as pd
import random

# Path to your CSV file
file_path = '/content/styles.csv'

# Load the CSV into a DataFrame, skipping bad lines
df = pd.read_csv(file_path, on_bad_lines='skip')
df['Positive'] = df.drop(columns=['id','year']).astype(str).agg(' , '.join, axis=1)


# List all column names
print("Column names:", df.columns.tolist())

Column names: ['id', 'gender', 'masterCategory', 'subCategory', 'articleType', 'baseColour', 'season', 'year', 'usage', 'productDisplayName', 'Positive']


In [17]:
target_columns = ['gender', 'masterCategory', 'subCategory', 'articleType', 'baseColour', 'season', 'usage']

# Loop through each column and print its unique values
for col in target_columns:
    unique_vals = df[col].dropna().unique().tolist()
    # print(f"{col} ({len(unique_vals)} unique): {unique_vals}\n")


In [18]:


# --- Define category groups ---
topwear = [
    'Shirts', 'Tshirts', 'Tops', 'Sweatshirts', 'Jackets', 'Blazers',
    'Kurtas', 'Waistcoat', 'Tunics', 'Shrug', 'Sweaters', 'Nightdress',
    'Kurtis', 'Camisoles', 'Jumpsuit', 'Nehru Jackets', 'Lounge Tshirts',
    'Suits', 'Robe'
]

bottomwear = [
    'Jeans', 'Track Pants', 'Shorts', 'Trousers', 'Capris', 'Boxers',
    'Trunk', 'Lounge Pants', 'Leggings', 'Patiala', 'Churidar', 'Tights',
    'Jeggings', 'Lounge Shorts', 'Rain Trousers'
]

accessories_groups = {
    'footwear': [
        'Casual Shoes', 'Formal Shoes', 'Flip Flops', 'Sandals', 'Heels', 'Flats',
        'Sports Shoes', 'Socks', 'Shoe Accessories', 'Shoe Laces',
    ],
    'fashion': [
        'Ties', 'Cufflinks', 'Ties and Cufflinks', 'Wristbands', 'Belts', 'Headband',
        'Gloves', 'Stockings', 'Stoles', 'Mufflers', 'Scarves', 'Hats', 'Caps',
        'Mask and Peel', 'Shapewear', 'Suspenders', 'Waist Pouch',
    ],
    'bags': [
        'Handbags', 'Clutches', 'Wallets', 'Backpacks', 'Laptop Bag', 'Duffel Bag',
        'Trolley Bag', 'Messenger Bag', 'Rucksacks', 'Mobile Pouch', 'Tablet Sleeve',
        'Travel Accessory',
    ],
    'beauty': [
        'Deodorant', 'Perfume and Body Mist', 'Fragrance Gift Set', 'Lipstick',
        'Lip Gloss', 'Lip Care', 'Lip Liner', 'Lip Plumper', 'Foundation and Primer',
        'Highlighter and Blush', 'Compact', 'Concealer', 'Mascara', 'Kajal and Eyeliner',
        'Eyeshadow', 'Makeup Remover', 'Face Wash and Cleanser', 'Face Moisturisers',
        'Eye Cream', 'Face Scrub and Exfoliator', 'Face Serum and Gel', 'Sunscreen',
        'Body Lotion', 'Body Wash and Scrub', 'Hair Colour', 'Beauty Accessory',
        'Mens Grooming Kit',
    ],
    'jewelry': [
        'Earrings', 'Ring', 'Bracelet', 'Pendant', 'Necklace and Chains',
        'Jewellery Set', 'Bangle', 'Accessory Gift Set',
    ],
    'others': [
        'Water Bottle', 'Footballs', 'Basketballs', 'Umbrellas', 'Cushion Covers',
        'Key chain', 'Free Gifts', 'Ipad'
    ]
}
all_accessories = [item for group in accessories_groups.values() for item in group]

# --- Helper functions ---
gender_map = {
    'Men': 'Women',
    'Women': 'Men',
    'Boys': 'Girls',
    'Girls': 'Boys',
    'Unisex': 'Unisex'
}

def get_accessory_group(article):
    for group_name, group_items in accessories_groups.items():
        if article in group_items:
            return group_name
    return None

def reverse_article_type(article):
    if article in topwear:
        return random.choice(bottomwear)
    elif article in bottomwear:
        return random.choice(topwear)
    elif article in all_accessories:
        group = get_accessory_group(article)
        if random.random() < 0.5:
            same_group = [x for x in accessories_groups[group] if x != article]
            return random.choice(same_group) if same_group else article
        else:
            other_groups = [g for g in accessories_groups if g != group]
            new_group = random.choice(other_groups)
            return random.choice(accessories_groups[new_group])
    else:
        return random.choice(all_accessories)

def reverse_season(season):
    all_seasons = ['Fall', 'Summer', 'Winter', 'Spring']
    return random.choice([s for s in all_seasons if s != season])

def reverse_usage(usage):
    usages = ['Casual', 'Ethnic', 'Formal', 'Sports', 'Smart Casual', 'Travel', 'Party', 'Home']
    if usage == 'Casual':
        return 'Smart Casual' if random.random() < 0.7 else random.choice([u for u in usages if u not in ['Casual', 'Smart Casual']])
    else:
        return random.choice([u for u in usages if u != usage])

# --- Create negative column ---
def create_negative(row):
    gender = gender_map.get(row['gender'], 'Unisex')
    article = reverse_article_type(row['articleType'])
    season = reverse_season(row['season'])
    usage = reverse_usage(row['usage'])
    return f'{gender} , {article} , {season} , {usage}'

df['NEGATIVE'] = df.apply(create_negative, axis=1)

# --- Save to new CSV ---
# df.to_csv('final_negative_result.csv', index=False)

# --- Optional: preview ---
print(df.head())


      id gender masterCategory subCategory  articleType baseColour  season  \
0  15970    Men        Apparel     Topwear       Shirts  Navy Blue    Fall   
1  39386    Men        Apparel  Bottomwear        Jeans       Blue  Summer   
2  59263  Women    Accessories     Watches      Watches     Silver  Winter   
3  21379    Men        Apparel  Bottomwear  Track Pants      Black    Fall   
4  53759    Men        Apparel     Topwear      Tshirts       Grey  Summer   

     year   usage                             productDisplayName  \
0  2011.0  Casual               Turtle Check Men Navy Blue Shirt   
1  2012.0  Casual             Peter England Men Party Blue Jeans   
2  2016.0  Casual                       Titan Women Silver Watch   
3  2011.0  Casual  Manchester United Men Solid Black Track Pants   
4  2012.0  Casual                          Puma Men Grey T-shirt   

                                            Positive  \
0  Men , Apparel , Topwear , Shirts , Navy Blue ,...   
1  Men , A

In [19]:
print(df.loc[0, 'Positive'])
print(df.loc[0, 'NEGATIVE'])

Men , Apparel , Topwear , Shirts , Navy Blue , Fall , Casual , Turtle Check Men Navy Blue Shirt
Women , Trousers , Summer , Formal


In [21]:
df.to_csv("final_style.csv")