In [None]:
# Cell 1: Data Collection
import pandas as pd

# Assuming you have a CSV file named 'books.csv' with columns 'title', 'description'
df = pd.read_csv('books.csv')

# Display the first few rows of the dataset
df.head()

# Cell 2: Data Preprocessing
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import numpy as np

# Handle missing values in the 'description' column
df['description'].fillna('', inplace=True)

# Train-test split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Tokenize book descriptions
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_df['description'])

# Convert descriptions into sequences and pad them
train_sequences = tokenizer.texts_to_sequences(train_df['description'])
test_sequences = tokenizer.texts_to_sequences(test_df['description'])

max_sequence_length = max(len(seq) for seq in train_sequences)
total_words = len(tokenizer.word_index) + 1

X_train = pad_sequences(train_sequences, maxlen=max_sequence_length, padding='post')
X_test = pad_sequences(test_sequences, maxlen=max_sequence_length, padding='post')

# Display the tokenized sequences
print(X_train[:5])

X_array = np.array(X_train)

# Cell 3: Model Architecture
from keras.models import Model
from keras.layers import Input, Embedding, LSTM, Dense, Bidirectional
from keras.callbacks import EarlyStopping

# Define the input layer
input_layer = Input(shape=(max_sequence_length,))
# Embedding layer
embedding_layer = Embedding(total_words, 100, input_length=max_sequence_length)(input_layer)
# Bidirectional LSTM layer for better learning
lstm_layer = Bidirectional(LSTM(100, return_sequences=True))(embedding_layer)
# Dense layer for reconstruction
output_layer = Dense(total_words, activation='softmax')(lstm_layer)  # Adjusted activation function

# Build the model
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')  # Adjusted loss function

# Display the model summary
model.summary()

# Cell 4: Model Training
# Implement early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.fit(X_array, X_array, epochs=5, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Cell 5: User Input and Recommendation
user_query = input("Enter a book description: ")
print(user_query)

# Tokenize and pad user input
user_sequence = tokenizer.texts_to_sequences([user_query])
user_padded = pad_sequences(user_sequence, maxlen=max_sequence_length, padding='post')

# Get reconstructed sequence
reconstructed_sequence = model.predict(user_padded)

# Store reconstructed sequence in the original DataFrame
df['reconstructed_sequence'] = model.predict(X_array).tolist()

# Calculate similarity and recommend books
df['similarity'] = df['reconstructed_sequence'].apply(
    lambda x: np.linalg.norm(np.array(x) - np.array(reconstructed_sequence[0]))
)

# Sort by similarity to user input
recommended_books = df.sort_values(by='similarity').head(12)[['title' , 'similarity']]

print("Recommended Books:")
print(recommended_books)


In [None]:
user_query = input("Enter a book description: ")

# Tokenize and pad user input
user_sequence = tokenizer.texts_to_sequences([user_query])
user_padded = pad_sequences(user_sequence, maxlen=max_sequence_length, padding='post')

# Get reconstructed sequence
reconstructed_sequence = model.predict(user_padded)

# Store reconstructed sequence in the original DataFrame
df['reconstructed_sequence'] = model.predict(X_array).tolist()

# Calculate similarity and recommend books
df['similarity'] = df['reconstructed_sequence'].apply(
    lambda x: np.linalg.norm(np.array(x) - np.array(reconstructed_sequence[0]))
)

# Sort by similarity to user input
recommended_books = df.sort_values(by='similarity').head(12)[['title' , 'similarity']]

print("Recommended Books:")
print(recommended_books)

In [None]:
# Cell 1: Data Collection
import pandas as pd

# Assuming you have a CSV file named 'books.csv' with columns 'title', 'description'
df = pd.read_csv('books.csv')

# Display the first few rows of the dataset
df.head()

# Cell 2: Data Preprocessing
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# Handle missing values in the 'description' column
df['description'].fillna('', inplace=True)

# Tokenize book descriptions
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['description'])

# Convert descriptions into sequences and pad them
sequences = tokenizer.texts_to_sequences(df['description'])
max_sequence_length = max(len(seq) for seq in sequences)
total_words = len(tokenizer.word_index) + 1

X = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

# Display the tokenized sequences
print(X[:5])

import numpy as np

X_array = np.array(X)

# Cell 3: Model Architecture
from keras.models import Model
from keras.layers import Input, Embedding, LSTM, Dense

# Define the input layer
input_layer = Input(shape=(max_sequence_length,))
# Embedding layer
embedding_layer = Embedding(total_words, 100, input_length=max_sequence_length)(input_layer)
# LSTM layer
lstm_layer = LSTM(50)(embedding_layer)
# Dense layer for reconstruction
output_layer = Dense(max_sequence_length, activation='linear')(lstm_layer)

# Build the model
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='mean_squared_error')

# Display the model summary
model.summary()
# ...
# Cell 4: Model Training
model.fit(X_array, X_array, epochs=1, batch_size=32, validation_split=0.2)

# Cell 5: Recommendation
# Take user input from console
user_query = input("Enter a book description: ")

# Tokenize and pad user input
user_sequence = tokenizer.texts_to_sequences([user_query])
user_padded = pad_sequences(user_sequence, maxlen=max_sequence_length, padding='post')

# Get reconstructed sequence
reconstructed_sequence = model.predict(user_padded)

# Store reconstructed sequence in the original DataFrame
df['reconstructed_sequence'] = model.predict(X_array).tolist()

# Calculate similarity and recommend books
df['similarity'] = df['reconstructed_sequence'].apply(
    lambda x: np.linalg.norm(np.array(x) - np.array(reconstructed_sequence[0]))
)

# Sort by similarity to user input
recommended_books = df.sort_values(by='similarity').head(12)[['title' , 'similarity']]

print("Recommended Books:")
print(recommended_books)



In [6]:
# Cell 1: Data Collection
import pandas as pd

# Assuming you have a CSV file named 'books.csv' with columns 'title', 'description'
df = pd.read_csv('books.csv')

# Display the first few rows of the dataset
df.head()

# Cell 2: Data Preprocessing
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Handle missing values in the 'description' column
df['description'].fillna('', inplace=True)

# Tokenize book descriptions
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['description'])

# Convert descriptions into sequences and pad them
sequences = tokenizer.texts_to_sequences(df['description'])
max_sequence_length = max(len(seq) for seq in sequences)
total_words = len(tokenizer.word_index) + 1

X = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

# Display the tokenized sequences
print(X[:5])

import numpy as np

X_array = np.array(X)

# Cell 3: Model Architecture
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Bidirectional

# Define the input layer
input_layer = Input(shape=(max_sequence_length,))
# Embedding layer
embedding_layer = Embedding(total_words, 100, input_length=max_sequence_length)(input_layer)
# LSTM layer
lstm_layer = LSTM(50, return_sequences=True)(embedding_layer)
# Bidirectional LSTM layer for better representation
bidirectional_lstm = Bidirectional(LSTM(50))(lstm_layer)
# Dense layer for reconstruction
output_layer = Dense(max_sequence_length, activation='linear')(bidirectional_lstm)

# Build the model
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='mean_squared_error')

# Display the model summary
model.summary()

# Cell 4: Model Training
model.fit(X_array, X_array, epochs=5, batch_size=32, validation_split=0.2)



[[    4    51    11 ...     0     0     0]
 [    4    21 17955 ...     0     0     0]
 [  121    59     2 ...     0     0     0]
 [    4  1923  4849 ...     0     0     0]
 [12965    62    13 ...     0     0     0]]
Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 933)]             0         
                                                                 
 embedding_4 (Embedding)     (None, 933, 100)          3373500   
                                                                 
 lstm_6 (LSTM)               (None, 933, 50)           30200     
                                                                 
 bidirectional_3 (Bidirecti  (None, 100)               40400     
 onal)                                                           
                                                                 
 dense_4 (Dense)             (None, 933) 

<keras.src.callbacks.History at 0x25020e18150>

In [11]:
# Cell 5: Recommendation
# Take user input from console
user_query = input("Enter a book description: ")

# Tokenize and pad user input
user_sequence = tokenizer.texts_to_sequences([user_query])
user_padded = pad_sequences(user_sequence, maxlen=max_sequence_length, padding='post')

# Get reconstructed sequence
reconstructed_sequence = model.predict(user_padded)

# Store reconstructed sequence in the original DataFrame
df['reconstructed_sequence'] = model.predict(X_array).tolist()

# Calculate similarity and recommend books
df['similarity'] = df['reconstructed_sequence'].apply(
    lambda x: np.linalg.norm(np.array(x) - np.array(reconstructed_sequence[0]))
)


# Sort by similarity to user input in descending order
recommended_books = df.sort_values(by='similarity', ascending=False).head(12)[['title', 'similarity']]

print("Recommended Books:")
print(recommended_books)
