## Fall and Winter Analysis

In [4]:
import pandas as pd
import numpy as np

df = pd.read_csv('/Users/fuguan/Desktop/llm_winter_fall.csv')

In [11]:
# unique values in clothing type column
unique_clothing_types = df['clothing_type'].unique()
print("Unique Clothing Types:")
print(sorted(unique_clothing_types))

Unique Clothing Types:
['Accessory', 'Apron', 'Badge', 'Bag', 'Basket', 'Beanie', 'Belt', 'Bibs', 'Blanket', 'Blouse', 'Bodycon', 'Boots', 'Bra', 'Bracelet', 'Cap', 'Cape', 'Cardigan', 'Charms', 'Cheer Set', 'Coin', 'Costume', 'Crown', 'Dancewear', 'Dress', 'Eyeglasses Holder', 'Eyewear Strap', 'Face Cover', 'Face Mask', 'Gaiter', 'Glasses', 'Glove', 'Gloves', 'Goggles', 'Hat', 'Headband', 'Headwear Set', 'Hoodie', 'Jacket', 'Jersey', 'Jewelry', 'Jumpsuit', 'Key Cover', 'Keychain', 'Lanyard', 'Leggings', 'Leotard', 'Long-Sleeve', 'Long-Sleeve shirt', 'Lounge Set', 'Luggage Tags', 'Mask', 'Necklace', 'Necktie', 'Nightgown', 'Other', 'Outfit Set', 'Packing Cubes', 'Pajamas', 'Pants', 'Phone Case', 'Pin', 'Poncho', 'Pullover', 'Purse', 'Robe', 'Romper', 'Sandals', 'Sash', 'Scarf', 'Set', 'Shoes', 'Shorts', 'Skirt', 'Slippers', 'Socks', 'Sunglasses', 'Sweater', 'Sweatsuit', 'Swimsuit', 'T-Shirt', 'T-shirt', 'Tank Top', 'Tee Combo Set', 'Tie', 'Tights', 'Top', 'Tote', 'Tracksuit', 'Tunic', 

In [7]:
# top clothing products in fall and winter
winter_clothing = set(df[df['season'] == 'Winter']['clothing_type'].unique())
fall_clothing = set(df[df['season'] == 'Fall']['clothing_type'].unique())
winter_only = winter_clothing - fall_clothing
print("\nClothing Types ONLY Brought in Winter:")
print(sorted(winter_only))
fall_only = fall_clothing - winter_clothing
print("\nClothing Types ONLY Brought in Fall:")
print(sorted(fall_only))


Clothing Types ONLY Brought in Winter:
['Basket', 'Beanie', 'Bibs', 'Blanket', 'Bracelet', 'Cap', 'Cheer Set', 'Coin', 'Eyeglasses Holder', 'Eyewear Strap', 'Face Cover', 'Glove', 'Gloves', 'Goggles', 'Jacket', 'Jersey', 'Key Cover', 'Leotard', 'Luggage Tags', 'Necklace', 'Necktie', 'Nightgown', 'Packing Cubes', 'Phone Case', 'Pullover', 'Sandals', 'Scarf', 'Shoes', 'Sweatsuit', 'Tee Combo Set', 'Tie', 'Tights', 'Tote', 'Tunic', 'Vest', 'Waders', 'Watch Winder', 'coat', 'dress', 'hoodie', 'mittens', 'top']

Clothing Types ONLY Brought in Fall:
['Accessory', 'Badge', 'Bodycon', 'Cape', 'Charms', 'Dancewear', 'Headband', 'Headwear Set', 'Lanyard', 'Lounge Set', 'Mask', 'Outfit Set', 'Pin', 'Poncho', 'Romper', 'Sash', 'Set', 'Sweater', 'Swimsuit', 'Tracksuit', 'Umbrella', 'Undershirt', 'Workout Set', 'face mask', 'gaiter', 'headpiece', 'pants', 'sweatpants']


In [8]:
# variance in clothing types between fall and winter
winter_counts = df[df['season'] == 'Winter']['clothing_type'].value_counts()
fall_counts = df[df['season'] == 'Fall']['clothing_type'].value_counts()
clothing_types = set(winter_counts.index).union(set(fall_counts.index))
variance_data = []
for clothing in clothing_types:
    winter_count = winter_counts.get(clothing, 0)
    fall_count = fall_counts.get(clothing, 0)
    variance = abs(winter_count - fall_count)
    variance_data.append((clothing, winter_count, fall_count, variance))
variance_df = pd.DataFrame(variance_data, columns=['clothing_type', 'winter_count', 'fall_count', 'variance'])
variance_df = variance_df.sort_values(by='variance', ascending=False)
print("\nClothing Types with Highest Variance Between Fall and Winter:")
print(variance_df.head(10))


Clothing Types with Highest Variance Between Fall and Winter:
    clothing_type  winter_count  fall_count  variance
103        Gloves            12           0        12
42     Watch Band             8          15         7
55       Tank Top             2           9         7
19     Sunglasses             5          12         7
15    Long-Sleeve            12           5         7
5            Mask             0           6         6
63        Costume             1           6         5
62         Jacket             5           0         5
108        Shorts             1           6         5
50          Socks            13           9         4


In [12]:
# average rating per clothing item in fall vs winter
avg_ratings = df.groupby(['clothing_type', 'season'])['rating'].mean().unstack()
avg_ratings['rating_diff'] = abs(avg_ratings['Winter'] - avg_ratings['Fall'])
avg_ratings = avg_ratings.sort_values(by='rating_diff', ascending=False)
print("\nClothing Types with Highest Rating Difference Between Fall and Winter:")
print(avg_ratings.head(10))


Clothing Types with Highest Rating Difference Between Fall and Winter:
season                 Fall  Winter  rating_diff
clothing_type                                   
shirt              4.000000     1.0     3.000000
Apron              5.000000     3.0     2.000000
shoes              3.000000     5.0     2.000000
Top                5.000000     3.0     2.000000
Robe               3.000000     5.0     2.000000
Sunglasses         4.250000     2.4     1.850000
Boots              5.000000     3.2     1.800000
Shorts             3.333333     5.0     1.666667
Gaiter             3.500000     2.0     1.500000
Long-Sleeve shirt  3.500000     5.0     1.500000
