In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

In [24]:
data = pd.read_csv('Womens Clothing E-Commerce Reviews.csv')

In [25]:
# Preprocess data
data = data[['Clothing ID', 'Title', 'Review Text','Rating']]
data = data.dropna(subset=['Review Text'])

# Split data into training and testing sets
X = data[['Title', 'Review Text']]
y = data['Rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
# Tokenize text data
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train['Review Text'])
X_train_sequences = tokenizer.texts_to_sequences(X_train['Review Text'])
X_test_sequences = tokenizer.texts_to_sequences(X_test['Review Text'])

In [27]:
# Pad sequences
max_sequence_length = 100
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length)

In [28]:
# Build CNN model
embedding_dim = 100
num_filters = 128
filter_sizes = [3, 4, 5]

model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=max_sequence_length))
model.add(Conv1D(num_filters, 3, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(128, activation='relu'))
model.add(Dense(1))

In [29]:
# Compile and train the model
model.compile(optimizer='adam', loss='mse')
model.fit(X_train_padded, y_train, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f9bb2d5c9d0>

In [30]:
# Generate recommendations
def recommend_items(item_id, top_n=5):
    item_sequence = tokenizer.texts_to_sequences([str(item_id)])  # Convert item_id to a string
    item_padded = pad_sequences(item_sequence, maxlen=max_sequence_length)
    rating_predictions = model.predict(item_padded)
    top_indices = np.argsort(rating_predictions, axis=1)[:, -top_n:]  # Get indices of top N predicted ratings
    recommended_items = [data['Clothing ID'].iloc[idx[0]] for idx in top_indices]  # Get the corresponding item IDs
    return recommended_items

# Example recommendation
item_id = 2  # Example item ID
recommended_items = recommend_items(item_id, top_n=5)
print("Recommended Items:", recommended_items)

Recommended Items: [767]


In [15]:
# Evaluate the model
loss = model.evaluate(X_test_padded, y_test)
print("Test Loss:", loss)

Test Loss: 0.571751058101654


In [11]:
item_id = 22  # Example item ID
recommended_items = recommend_items(item_id)
print(recommended_items)

[[2.8542988]]


In [31]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

# Preprocess data
data = data[['Clothing ID', 'Title', 'Review Text', 'Rating']]
data = data.dropna(subset=['Review Text'])

# Split data into training and testing sets
X = data[['Title', 'Review Text']]
y = data['Rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenize text data
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train['Review Text'])
X_train_sequences = tokenizer.texts_to_sequences(X_train['Review Text'])
X_test_sequences = tokenizer.texts_to_sequences(X_test['Review Text'])

# Pad sequences
max_sequence_length = 100
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length)

# Build CNN model
embedding_dim = 100
num_filters = 128
filter_sizes = [3, 4, 5]

model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=max_sequence_length))
model.add(Conv1D(num_filters, 3, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(128, activation='relu'))
model.add(Dense(1))

# Compile and train the model
model.compile(optimizer='adam', loss='mse')
model.fit(X_train_padded, y_train, epochs=10, batch_size=32)

# Evaluate the model
loss = model.evaluate(X_test_padded, y_test)
print("Test Loss:", loss)

# Generate recommendations
def recommend_items(item_text):
    item_sequence = tokenizer.texts_to_sequences([item_text])
    item_padded = pad_sequences(item_sequence, maxlen=max_sequence_length)
    rating_prediction = model.predict(item_padded)[0][0]
    return rating_prediction

# Example recommendation
item_text = "This jacket is so adorable on! I have no idea why it has such a low rating. The quality is great, and it fits perfectly."
rating_prediction = recommend_items(item_text)
print("Rating Prediction:", rating_prediction)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.6146034002304077
Rating Prediction: 4.252463


In [35]:
data = pd.read_csv('Womens Clothing E-Commerce Reviews.csv')

In [43]:
data.isnull().sum()

Clothing ID    0
Title          0
Review Text    0
Rating         0
dtype: int64

In [33]:
# Example recommendation
item_id = 'Amazing soft top'  # Example item ID
recommended_item = recommend_items(item_id)
print("Recommended Item:", recommended_item)

Recommended Item: 861


In [47]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

# Preprocess data
data = data[['Clothing ID', 'Title', 'Review Text', 'Rating']]
data = data.dropna(subset=['Review Text', 'Title'])

# Split data into training and testing sets
X = data[['Review Text', 'Rating']]
y = data['Clothing ID']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenize text data
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train['Review Text'])
X_train_sequences = tokenizer.texts_to_sequences(X_train['Review Text'])
X_test_sequences = tokenizer.texts_to_sequences(X_test['Review Text'])

# Pad sequences
max_sequence_length = 100
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length)

# Build CNN model
embedding_dim = 100
num_filters = 128
filter_sizes = [3, 4, 5]

model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=max_sequence_length))
model.add(Conv1D(num_filters, 3, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(128, activation='relu'))
model.add(Dense(1))

# Compile and train the model
model.compile(optimizer='adam', loss='mse')
model.fit(X_train_padded, y_train, epochs=1, batch_size=32)

# Evaluate the model
loss = model.evaluate(X_test_padded, y_test)
print("Test Loss:", loss)

# Generate recommendations
def recommend_items(item_id):
    item_sequence = tokenizer.texts_to_sequences([item_id])
    item_padded = pad_sequences(item_sequence, maxlen=max_sequence_length)
    rating_prediction = model.predict(item_padded)
    return rating_prediction

# Example recommendation
item_id = 'Amazing soft jacket'  # Example item ID
rating_prediction = recommend_items(item_id)
print("Rating Prediction:", rating_prediction)


Test Loss: 31465.75
Rating Prediction: [[598.0823]]


In [48]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

# Preprocess data
data = data[['Clothing ID', 'Title', 'Review Text', 'Rating']]
data = data.dropna(subset=['Review Text', 'Title'])

# Split data into training and testing sets
X = data[['Title', 'Review Text']]
y = data['Rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenize and pad the text data
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train['Review Text'])
X_train_sequences = tokenizer.texts_to_sequences(X_train['Review Text'])
X_test_sequences = tokenizer.texts_to_sequences(X_test['Review Text'])
max_sequence_length = 100
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length)

# Build the CNN model
embedding_dim = 100
num_filters = 128

model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=max_sequence_length))
model.add(Conv1D(num_filters, 3, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(128, activation='relu'))
model.add(Dense(1))

# Compile and train the model
model.compile(optimizer='adam', loss='mse')
model.fit(X_train_padded, y_train, epochs=1, batch_size=32)

# Generate recommendations for a selected clothing ID
def recommend_items(clothing_id):
    clothing_title = data[data['Clothing ID'] == clothing_id]['Title'].values[0]
    clothing_review = data[data['Clothing ID'] == clothing_id]['Review Text'].values[0]
    input_sequence = tokenizer.texts_to_sequences([clothing_review])
    input_padded = pad_sequences(input_sequence, maxlen=max_sequence_length)
    rating_prediction = model.predict(input_padded)
    return clothing_title, rating_prediction

# Example recommendation
selected_clothing_id = 22
clothing_title, rating_prediction = recommend_items(selected_clothing_id)
print("Recommended Clothing Title:", clothing_title)
print("Rating Prediction:", rating_prediction)


Recommended Clothing Title: Please bring this shirt back!!!
Rating Prediction: [[4.735408]]
