# Myntra Sustainable-Fashion Recommendation System Using NLP

## Adding some sample data in existing file

In [1]:
 import pandas as pd
import random

# Sample data for fashion items
fashion_items = {
    'item_id': [f'item_{i}' for i in range(1, 21)],
    'name': [f'Fashion Item {i}' for i in range(1, 21)],
    'category': random.choices(['T-Shirt', 'Jeans', 'Jacket', 'Dress', 'Shoes'], k=20),
    'price': [round(random.uniform(10, 100), 2) for _ in range(20)],
    'description': [
        "This eco-friendly t-shirt is made from organic cotton.",
        "A pair of stylish jeans made from recycled materials.",
        "A warm jacket with sustainable insulation.",
        "A beautiful dress crafted from biodegradable fabric.",
        "Comfortable shoes made with renewable resources."
    ] * 4
}

df_fashion_items = pd.DataFrame(fashion_items)
df_fashion_items.head()


Unnamed: 0,item_id,name,category,price,description
0,item_1,Fashion Item 1,Jeans,95.43,This eco-friendly t-shirt is made from organic...
1,item_2,Fashion Item 2,Jeans,26.31,A pair of stylish jeans made from recycled mat...
2,item_3,Fashion Item 3,T-Shirt,42.24,A warm jacket with sustainable insulation.
3,item_4,Fashion Item 4,Dress,39.3,A beautiful dress crafted from biodegradable f...
4,item_5,Fashion Item 5,Jeans,26.9,Comfortable shoes made with renewable resources.


## Sample data for user interactions

In [5]:
# Sample data for user interactions
user_ids = [f'user_{i}' for i in range(1, 11)]
item_ids = [f'item_{i}' for i in range(1, 21)]
interactions = ['view', 'like', 'purchase']

# Generate interactions
user_interactions = {
    'user_id': random.choices(user_ids, k=50),
    'item_id': random.choices(item_ids, k=50),
    'interaction': random.choices(interactions, k=50)
}

df_user_interactions = pd.DataFrame(user_interactions)
df_user_interactions.head()


Unnamed: 0,user_id,item_id,interaction
0,user_8,item_9,like
1,user_5,item_20,view
2,user_4,item_17,purchase
3,user_1,item_7,purchase
4,user_2,item_6,purchase


In [7]:
fashion_trends = {
    'date': pd.date_range(start='2023-01-01', periods=30, freq='D'),
    'trend_score': [round(random.uniform(0, 100), 2) for _ in range(30)]
}

df_fashion_trends = pd.DataFrame(fashion_trends)
df_fashion_trends.head()


Unnamed: 0,date,trend_score
0,2023-01-01,73.01
1,2023-01-02,33.66
2,2023-01-03,94.15
3,2023-01-04,6.6
4,2023-01-05,13.74


## If no data available generate some sample data with description

In [8]:
import random
import pandas as pd

# Generate 150 rows of fashion items with different descriptions
fashion_items = {
    'id': [f'item_{i}' for i in range(1, 151)],
    'name': [f'Fashion Item {i}' for i in range(1, 151)],
    'category': random.choices(['T-Shirt', 'Jeans', 'Jacket', 'Dress', 'Shoes'], k=150),
    'description': [
        "This eco-friendly t-shirt is made from organic cotton.",
        "A pair of stylish jeans made from recycled materials.",
        "A warm jacket with sustainable insulation.",
        "A beautiful dress crafted from biodegradable fabric.",
        "Comfortable shoes made with renewable resources."
    ] * 30
}

# Add brands
brands = ['RARE RABBIT', 'ADIDAS', 'ROADSTER', 'HRX by Hrithik Roshan', 'HERE & NOW', 'DressBerry']
fashion_items['brand'] = random.choices(brands, k=150)

# Define price ranges for each category
price_ranges = {
    'T-Shirt': (800, 2000),
    'Jeans': (800, 3000),
    'Jacket': (1200, 3000),
    'Dress': (800, 3000),
    'Shoes': (1000, 4000)
}

# Generate prices based on the category-specific ranges
fashion_items['price'] = [round(random.uniform(*price_ranges[cat]), 2) for cat in fashion_items['category']]

# Create a DataFrame from the fashion items
fashion_df = pd.DataFrame(fashion_items)

# Display the first 5 rows of the DataFrame
print(fashion_df.head())

# Convert the DataFrame to a CSV file
csv_file_path = 'fashion_items_with_brands.csv'
fashion_df.to_csv(csv_file_path, index=False)

print(f"CSV file saved to {csv_file_path}")


       id            name category  \
0  item_1  Fashion Item 1   Jacket   
1  item_2  Fashion Item 2    Jeans   
2  item_3  Fashion Item 3    Jeans   
3  item_4  Fashion Item 4    Dress   
4  item_5  Fashion Item 5   Jacket   

                                         description                  brand  \
0  This eco-friendly t-shirt is made from organic...  HRX by Hrithik Roshan   
1  A pair of stylish jeans made from recycled mat...            RARE RABBIT   
2         A warm jacket with sustainable insulation.             HERE & NOW   
3  A beautiful dress crafted from biodegradable f...  HRX by Hrithik Roshan   
4   Comfortable shoes made with renewable resources.             DressBerry   

     price  
0  1508.46  
1  2291.25  
2  1585.52  
3   979.81  
4  2386.04  
CSV file saved to fashion_items_with_brands.csv


## If data available, load the file with brands and descriptions

In [25]:
import pandas as pd

# Load the CSV file
df_fashion_items_load = pd.read_csv('fashion_items_with_brands.csv')

# Display the first few rows of the DataFrame
print(df_fashion_items_load.head())


       id            name category  \
0  item_1  Fashion Item 1   Jacket   
1  item_2  Fashion Item 2   Jacket   
2  item_3  Fashion Item 3    Jeans   
3  item_4  Fashion Item 4    Jeans   
4  item_5  Fashion Item 5    Jeans   

                                         description                  brand  \
0  Sustainable style that keeps you warm. Experie...             DressBerry   
1  Crafted for warmth, built to last. This jacket...             DressBerry   
2  Organic comfort, endless adventures. Explore t...            RARE RABBIT   
3  Responsibly sourced, remarkably comfortable. T...  HRX by Hrithik Roshan   
4  Look good, feel good, do good. These jeans are...             HERE & NOW   

     price  
0  1995.70  
1  1605.66  
2   883.30  
3  2340.43  
4  2102.34  


In [26]:
csv_file_path = 'user_interactions.csv'
df_user_interactions.to_csv(csv_file_path, index=False)
df_users_load = pd.read_csv('user_interactions.csv')

# Display the first few rows of the DataFrame
print(df_users_load.head())


  user_id  item_id interaction
0  user_8   item_9        like
1  user_5  item_20        view
2  user_4  item_17    purchase
3  user_1   item_7    purchase
4  user_2   item_6    purchase


## Natural Language Processing

In [27]:
import spacy
from spacy.tokens import DocBin
import re

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Define the list of keywords
keywords = ["eco-friendly", "sustainable", "sustainability", "renewable", "organic", "reused", "recycled"]



### CHECK FOR SUSTAINABLE FASHION KEYWORDS

In [29]:
# Function to check if any keyword is in the description
def contains_keywords(description):
    description = description.lower()
    for keyword in keywords:
        if keyword in description:
            return True
    return False

# Apply the function to the description column
df_fashion_items_load['contains_keywords'] = df_fashion_items_load['description'].apply(contains_keywords)

# Display the DataFrame with the new column
print(df_fashion_items_load.head())


       id            name category  \
0  item_1  Fashion Item 1   Jacket   
1  item_2  Fashion Item 2   Jacket   
2  item_3  Fashion Item 3    Jeans   
3  item_4  Fashion Item 4    Jeans   
4  item_5  Fashion Item 5    Jeans   

                                         description                  brand  \
0  Sustainable style that keeps you warm. Experie...             DressBerry   
1  Crafted for warmth, built to last. This jacket...             DressBerry   
2  Organic comfort, endless adventures. Explore t...            RARE RABBIT   
3  Responsibly sourced, remarkably comfortable. T...  HRX by Hrithik Roshan   
4  Look good, feel good, do good. These jeans are...             HERE & NOW   

     price  contains_keywords  
0  1995.70               True  
1  1605.66               True  
2   883.30               True  
3  2340.43               True  
4  2102.34               True  


In [30]:
# Filter products containing the keywords
recommended_products = df_fashion_items_load[df_fashion_items_load['contains_keywords']]

# Display the recommended products
print(recommended_products)


           id              name category  \
0      item_1    Fashion Item 1   Jacket   
1      item_2    Fashion Item 2   Jacket   
2      item_3    Fashion Item 3    Jeans   
3      item_4    Fashion Item 4    Jeans   
4      item_5    Fashion Item 5    Jeans   
..        ...               ...      ...   
145  item_146  Fashion Item 146    Dress   
146  item_147  Fashion Item 147    Dress   
147  item_148  Fashion Item 148   Jacket   
148  item_149  Fashion Item 149   Jacket   
149  item_150  Fashion Item 150    Shoes   

                                           description                  brand  \
0    Sustainable style that keeps you warm. Experie...             DressBerry   
1    Crafted for warmth, built to last. This jacket...             DressBerry   
2    Organic comfort, endless adventures. Explore t...            RARE RABBIT   
3    Responsibly sourced, remarkably comfortable. T...  HRX by Hrithik Roshan   
4    Look good, feel good, do good. These jeans are...            

### Compare with Orignal number of Products

In [31]:
import pandas as pd

# Load the CSV file
df_original = pd.read_csv('fashion_items_with_brands.csv')

# Display the first few rows of the DataFrame
print("Actual products:")
print(df_original.head())

# Show the actual number of products
print(f"Total number of products: {len(df_original)}")


Actual products:
       id            name category  \
0  item_1  Fashion Item 1   Jacket   
1  item_2  Fashion Item 2   Jacket   
2  item_3  Fashion Item 3    Jeans   
3  item_4  Fashion Item 4    Jeans   
4  item_5  Fashion Item 5    Jeans   

                                         description                  brand  \
0  Sustainable style that keeps you warm. Experie...             DressBerry   
1  Crafted for warmth, built to last. This jacket...             DressBerry   
2  Organic comfort, endless adventures. Explore t...            RARE RABBIT   
3  Responsibly sourced, remarkably comfortable. T...  HRX by Hrithik Roshan   
4  Look good, feel good, do good. These jeans are...             HERE & NOW   

     price  
0  1995.70  
1  1605.66  
2   883.30  
3  2340.43  
4  2102.34  
Total number of products: 150


In [32]:


# Show the size of the recommended products
print("Selected items:")
print(recommended_products)




Selected items:
           id              name category  \
0      item_1    Fashion Item 1   Jacket   
1      item_2    Fashion Item 2   Jacket   
2      item_3    Fashion Item 3    Jeans   
3      item_4    Fashion Item 4    Jeans   
4      item_5    Fashion Item 5    Jeans   
..        ...               ...      ...   
145  item_146  Fashion Item 146    Dress   
146  item_147  Fashion Item 147    Dress   
147  item_148  Fashion Item 148   Jacket   
148  item_149  Fashion Item 149   Jacket   
149  item_150  Fashion Item 150    Shoes   

                                           description                  brand  \
0    Sustainable style that keeps you warm. Experie...             DressBerry   
1    Crafted for warmth, built to last. This jacket...             DressBerry   
2    Organic comfort, endless adventures. Explore t...            RARE RABBIT   
3    Responsibly sourced, remarkably comfortable. T...  HRX by Hrithik Roshan   
4    Look good, feel good, do good. These jeans ar

In [33]:
# Show the number of selected items
print(f"Total number of selected items used for Sustainable Fashion Recommendation System are: {len(recommended_products)}")

Total number of selected items used for Sustainable Fashion Recommendation System are: 106
