In [7]:
from flask import Flask, request, jsonify
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, mean_squared_error
from pyngrok import ngrok
from flask_ngrok import run_with_ngrok
import numpy as np


In [5]:
# Create a Flask application instance
app = Flask(__name__)
run_with_ngrok(app)  # Start ngrok when the app is run
# loading the data from the csv file
df=pd.read_csv('fashion.csv')
df.head()

Unnamed: 0,ProductId,Gender,Category,SubCategory,ProductType,Colour,Usage,ProductTitle,Image,ImageURL
0,42419,Girls,Apparel,Topwear,Tops,White,Casual,Gini and Jony Girls Knit White Top,42419.jpg,http://assets.myntassets.com/v1/images/style/p...
1,34009,Girls,Apparel,Topwear,Tops,Black,Casual,Gini and Jony Girls Black Top,34009.jpg,http://assets.myntassets.com/v1/images/style/p...
2,40143,Girls,Apparel,Topwear,Tops,Blue,Casual,Gini and Jony Girls Pretty Blossom Blue Top,40143.jpg,http://assets.myntassets.com/v1/images/style/p...
3,23623,Girls,Apparel,Topwear,Tops,Pink,Casual,Doodle Kids Girls Pink I love Shopping Top,23623.jpg,http://assets.myntassets.com/v1/images/style/p...
4,47154,Girls,Apparel,Bottomwear,Capris,Black,Casual,Gini and Jony Girls Black Capris,47154.jpg,http://assets.myntassets.com/v1/images/style/p...


In [None]:
def normalize_title(title):
    words = title.lower().split()
    sorted_words = ' '.join(sorted(words))
    return sorted_words

# Convert titles to normalized form for comparison
df['NormalizedTitle'] = df['ProductTitle'].apply(normalize_title)

# Remove duplicates from 'df' in place
df.drop_duplicates(subset='NormalizedTitle', keep='first', inplace=True)

# Remove the 'NormalizedTitle' column
df.drop(columns=['NormalizedTitle'], inplace=True)

# Print or use the modified DataFrame 'df' with duplicates removed
print(df)

      ProductId Gender  Category SubCategory   ProductType Colour   Usage  \
0         42419  Girls   Apparel     Topwear          Tops  White  Casual   
1         34009  Girls   Apparel     Topwear          Tops  Black  Casual   
2         40143  Girls   Apparel     Topwear          Tops   Blue  Casual   
3         23623  Girls   Apparel     Topwear          Tops   Pink  Casual   
4         47154  Girls   Apparel  Bottomwear        Capris  Black  Casual   
...         ...    ...       ...         ...           ...    ...     ...   
2901      51755  Women  Footwear       Shoes  Casual Shoes  Black  Casual   
2902      47630  Women  Footwear       Shoes         Flats   Blue  Casual   
2903      32836  Women  Footwear       Shoes         Flats   Pink  Casual   
2904      35821  Women  Footwear       Shoes         Heels  Black  Casual   
2905      18553  Women  Footwear       Shoes         Heels   Blue  Casual   

                                     ProductTitle      Image  \
0          

In [None]:
# Assuming you have a DataFrame called df
df_with_index = df.reset_index()

# Assuming you have a DataFrame called df
df.reset_index(inplace=True)

# Display the DataFrame with the new column index
df.head()

Unnamed: 0,index,ProductId,Gender,Category,SubCategory,ProductType,Colour,Usage,ProductTitle,Image,ImageURL
0,0,42419,Girls,Apparel,Topwear,Tops,White,Casual,Gini and Jony Girls Knit White Top,42419.jpg,http://assets.myntassets.com/v1/images/style/p...
1,1,34009,Girls,Apparel,Topwear,Tops,Black,Casual,Gini and Jony Girls Black Top,34009.jpg,http://assets.myntassets.com/v1/images/style/p...
2,2,40143,Girls,Apparel,Topwear,Tops,Blue,Casual,Gini and Jony Girls Pretty Blossom Blue Top,40143.jpg,http://assets.myntassets.com/v1/images/style/p...
3,3,23623,Girls,Apparel,Topwear,Tops,Pink,Casual,Doodle Kids Girls Pink I love Shopping Top,23623.jpg,http://assets.myntassets.com/v1/images/style/p...
4,4,47154,Girls,Apparel,Bottomwear,Capris,Black,Casual,Gini and Jony Girls Black Capris,47154.jpg,http://assets.myntassets.com/v1/images/style/p...


In [None]:
#total number of rows and column
df.shape

(2174, 11)

In [None]:
#selecting the relevant features
selected_features=['Gender','Category','SubCategory','ProductType','Colour','Usage','ProductTitle']
selected_features

['Gender',
 'Category',
 'SubCategory',
 'ProductType',
 'Colour',
 'Usage',
 'ProductTitle']

In [None]:
# Verify the column names in your dataset
print(df.columns)

# Combine the selected features
combined_features = df['Gender'] + ' ' +df['ProductTitle'] + ' ' + df['Category'] + ' ' + df['SubCategory'] + ' ' + df['ProductType'] + ' ' + df['Colour'] + ' ' + df['Usage']

# Display the combined features
print(combined_features)
combined_features.shape

Index(['index', 'ProductId', 'Gender', 'Category', 'SubCategory',
       'ProductType', 'Colour', 'Usage', 'ProductTitle', 'Image', 'ImageURL'],
      dtype='object')
0              Girls Apparel Topwear Tops White Casual
1              Girls Apparel Topwear Tops Black Casual
2               Girls Apparel Topwear Tops Blue Casual
3               Girls Apparel Topwear Tops Pink Casual
4         Girls Apparel Bottomwear Capris Black Casual
                             ...                      
2169    Women Footwear Shoes Casual Shoes Black Casual
2170            Women Footwear Shoes Flats Blue Casual
2171            Women Footwear Shoes Flats Pink Casual
2172           Women Footwear Shoes Heels Black Casual
2173            Women Footwear Shoes Heels Blue Casual
Length: 2174, dtype: object


In [None]:
#convert textual data into numerical data
vectorizer=TfidfVectorizer()


In [None]:
feature_vectors=vectorizer.fit_transform(combined_features)

In [None]:
feature_vectors

<2174x89 sparse matrix of type '<class 'numpy.float64'>'
	with 12428 stored elements in Compressed Sparse Row format>

In [None]:
#cosine similarity
#getting similarity score using cosine similarity
similarity=cosine_similarity(feature_vectors)

In [None]:
# Round the similarity values to two decimal places
similarity= np.round(similarity, decimals=2)

In [None]:
similarity

array([[1.        , 0.82086334, 0.80648582, ..., 0.03602434, 0.04366241,
        0.04263056],
       [0.82086334, 1.        , 0.8090005 , ..., 0.03613667, 0.24987308,
        0.04276348],
       [0.80648582, 0.8090005 , 1.        , ..., 0.03550373, 0.04303142,
        0.27996654],
       ...,
       [0.03602434, 0.03613667, 0.03550373, ..., 1.        , 0.33281061,
        0.32494547],
       [0.04366241, 0.24987308, 0.04303142, ..., 0.33281061, 1.        ,
        0.74155164],
       [0.04263056, 0.04276348, 0.27996654, ..., 0.32494547, 0.74155164,
        1.        ]])

In [None]:
similarity.shape
#we have to check the similiarity with each movie

(2174, 2174)

In [None]:
#
# Function to get recommendations for a specific product
#def get_recommendations(product_title, similarity_scores, df, num_recommendations=10):
 #   find_close_match = difflib.get_close_matches(product_title, df['ProductTitle'].tolist())
  #  close_match = find_close_match[0]
   # index_of_product = df[df.ProductTitle == close_match].index[0]
    #similarity_scores = list(enumerate(similarity_scores[index_of_product]))
    #sorted_similar_products = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    #return [df.loc[p[0], 'ProductTitle'] for p in sorted_similar_products[:num_recommendations]]


def get_recommendations(product_title, similarity_scores, df, num_recommendations=20):
    find_close_match = difflib.get_close_matches(product_title, df['ProductTitle'].tolist())
    close_match = find_close_match[0]
    index_of_product = df[df.ProductTitle == close_match].index[0]
    similarity_scores = list(enumerate(similarity_scores[index_of_product]))
    sorted_similar_products = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    recommended_products = []
    for p in sorted_similar_products[:num_recommendations]:
        recommended_product = {
            'image_url': df.loc[p[0], 'ImageURL'],
            'similarity_score': p[1],
        }
        recommended_products.append(recommended_product)

    return recommended_products

   # recommended_image_urls = []
   # for p in sorted_similar_products[:num_recommendations]:
    #    image_url = df.loc[p[0], 'ImageURL']
     #   recommended_image_urls.append(image_url)

  #  return recommended_image_urls


In [None]:
@app.route('/get_recommendations', methods=['POST'])
def get_recommendations_endpoint():
    data = request.get_json()
    product_title = data.get('product_title')

    # Get recommendations using the provided product_title
   # recommended_image_urls = get_recommendations(product_title, similarity, df, num_recommendations=4)

    # Return the recommendations as JSON
    #return jsonify({'image_urls': recommended_image_urls})

     # Get recommendations using the provided product_title
    recommended_products = get_recommendations(product_title, similarity, df, num_recommendations=20)

    # Return the recommendations as JSON
    return jsonify({'recommended_products': recommended_products})

#@app.route('/get_recommendations', methods=['POST'])
#def get_recommendations_endpoint():
 #   data = request.get_json()
  #  product_title = data.get('product_title')

    # Assuming you have implemented the 'get_recommendations' function correctly
   # recommended_products = get_recommendations(product_title, similarity, df, num_recommendations=20)

    # Return the recommendations as JSON
    #return jsonify({'recommendations': recommended_products})

if __name__ == '__main__':
   app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


 * Running on http://8f56-35-245-62-155.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


INFO:werkzeug:127.0.0.1 - - [04/Aug/2023 10:23:02] "POST /get_recommendations HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2023 10:24:00] "POST /get_recommendations HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2023 10:24:54] "POST /get_recommendations HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Aug/2023 10:25:41] "POST /get_recommendations HTTP/1.1" 200 -
