In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
%%writefile app.py
import os
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import streamlit as st
from PIL import Image

# Load the CSV file
df = pd.read_csv('/kaggle/input/myntra-fashion-product-dataset/fashion dataset.csv')

# Combine relevant features into a single string for TF-IDF
df['combined_features'] = df['product_name'] + ' ' + df['brand'] + ' ' + df['category'] + ' ' + df['gender']

# TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

# Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get recommendations
def get_recommendations(product_name, cosine_sim=cosine_sim):
    idx = df.index[df['product_name'] == product_name].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Get top 5 similar items
    item_indices = [i[0] for i in sim_scores]
    return df.iloc[item_indices]

# Streamlit App
st.title("Myntra Fashion Recommendation System")

# Select a product
product_name = st.selectbox("Select a product", df['product_name'].unique())

# Show recommendations when the button is clicked
if st.button("Get Recommendations"):
    recommendations = get_recommendations(product_name)
    st.write("Recommended products:")

    for _, row in recommendations.iterrows():
        image_path = f"/kaggle/input/myntra-fashion-product-dataset/images/{row['id']}.jpg"
        if os.path.exists(image_path):
            st.image(Image.open(image_path), caption=row['product_name'])
        else:
            st.write(f"Image not found for {row['product_name']}")


Writing app.py


In [5]:
from pyngrok import ngrok

# Start the Streamlit app
!streamlit run app.py &

# Expose the Streamlit app to the web using ngrok
public_url = ngrok.connect(port='8501')
print(f"Streamlit app is running on: {public_url}")


ModuleNotFoundError: No module named 'pyngrok'

In [10]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('/kaggle/input/myntra-fashion-product-dataset/Fashion Dataset.csv')

# Print the column names
print(df.columns)


Index(['Unnamed: 0', 'p_id', 'name', 'price', 'colour', 'brand', 'img',
       'ratingCount', 'avg_rating', 'description', 'p_attributes'],
      dtype='object')


In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# Load the CSV file
df = pd.read_csv('/kaggle/input/myntra-fashion-product-dataset/Fashion Dataset.csv')

# Fill missing values in relevant columns
df['name'] = df['name'].fillna('')
df['brand'] = df['brand'].fillna('')
df['colour'] = df['colour'].fillna('')
df['description'] = df['description'].fillna('')

# Combine relevant features into a single string for TF-IDF
df['combined_features'] = df['name'] + ' ' + df['brand'] + ' ' + df['colour'] + ' ' + df['description']

# TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

# Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Save the model components to a pickle file
with open('/kaggle/working/recommendation_model.pkl', 'wb') as f:
    pickle.dump({
        'df': df,
        'cosine_sim': cosine_sim
    }, f)


In [3]:
from IPython.display import FileLink

# Generate a download link for the file
file_path = '/kaggle/working/recommendation_model.pkl'
display(FileLink(file_path))


In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# Load the dataset
df = pd.read_csv('/kaggle/input/myntra-fashion-product-dataset/Fashion Dataset.csv')

# Fill missing values in relevant columns
df['name'] = df['name'].fillna('')
df['brand'] = df['brand'].fillna('')
df['colour'] = df['colour'].fillna('')
df['description'] = df['description'].fillna('')

# Combine relevant features into a single string for TF-IDF
df['combined_features'] = df['name'] + ' ' + df['brand'] + ' ' + df['colour'] + ' ' + df['description']

# TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_features'])

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Save the model components to a pickle file
with open('/kaggle/working/content_based_model.pkl', 'wb') as f:
    pickle.dump({
        'df': df,
        'cosine_sim': cosine_sim
    }, f)

print("Content-based filtering model saved.")


Content-based filtering model saved.
