In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Replace 'file_path' with the actual path to your dataset
file_path = '/kaggle/input/dataset-decthlon/Decathlon Apparel Data.csv'
data = pd.read_csv(file_path)

In [3]:
print(data.columns)  # This will display all the column names in your dataset

Index(['product_name', 'brand', 'star_rating', 'number_of_reviews', 'MRP',
       'sale_price', 'colour'],
      dtype='object')


In [4]:
print(data['product_name'])

0                       100 Base Layer Ski Pants Women's
1               Thermal Underwear Base Layer Top Women's
2                                   100 Ski Jacket Men's
3          Forclaz Trek 500 Merino Wool Hiking Hat Adult
4                Wedze FR100 Freeride Ski Jacket Women's
                             ...                        
634         MH K100, Category 3 Hiking Sunglasses, Kids'
635        Women's Cardio Training Fitness Shoes Mid 120
636     Keepdry 100 Long-Sleeved Soccer Base Layer Top '
637             Adult Football Bottoms Essential - Black
638    Boys' Gym Tracksuit Warm Breathable Synthetic ...
Name: product_name, Length: 639, dtype: object


In [5]:
# Display the first few rows of the dataset to understand its structure
print(data.head())

# Get information about the columns, data types, and missing values
print(data.info())

# Summary statistics of numerical columns
print(data.describe())

# Unique values in categorical columns
print(data['product_name'].unique())

                                    product_name    brand  star_rating  \
0               100 Base Layer Ski Pants Women's    Wedze          4.4   
1       Thermal Underwear Base Layer Top Women's    Wedze          4.5   
2                           100 Ski Jacket Men's    Wedze          4.6   
3  Forclaz Trek 500 Merino Wool Hiking Hat Adult  Forclaz          4.6   
4        Wedze FR100 Freeride Ski Jacket Women's    Wedze          4.5   

   number_of_reviews     MRP  sale_price      colour  
0               3101    9.99        9.99       Black  
1               7367    9.99        8.00       Black  
2               3798   39.99       30.00       Black  
3               1803   24.99       24.99  Whale Gray  
4                239  119.00       70.00  Light Gray  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 639 entries, 0 to 638
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   product_name       6

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load your dataset
# Assuming 'data' is your DataFrame with columns 'MRP', 'star_rating', and 'sale_price'

# Select features and target
features = ['MRP', 'star_rating']
target = 'sale_price'

X = data[features]
y = data[target]

# Scale features and target
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

# Create sequences for RNN
sequence_length = 5  # Define sequence length
X_seq = []
y_seq = []

for i in range(len(X_scaled) - sequence_length):
    X_seq.append(X_scaled[i : i + sequence_length])
    y_seq.append(y_scaled[i + sequence_length])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

# Build the RNN model
model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')

# Make predictions
predictions = model.predict(X_test)

# Inverse transform predictions to original scale
predicted_prices = scaler_y.inverse_transform(predictions)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.016375601291656494


In [7]:
predicted_prices = scaler_y.inverse_transform(predictions)
actual_prices = scaler_y.inverse_transform(y_test)

# Get the product names for respective test samples
product_names = data['product_name'].iloc[len(X_train):].values

# Print actual price, predicted price, and product name
for i in range(len(predicted_prices)):
    print(f"Product: {product_names[i]}, Actual Price: {actual_prices[i]}, Predicted Price: {predicted_prices[i][0]}")

Product: Men's Long-sleeve Undyed Merino Wool T-shirt - MT500, Actual Price: [29.99], Predicted Price: 21.306753158569336
Product: Quechua Trek 500, Waterproof Hiking Jacket, Men's, Actual Price: [10.], Predicted Price: 23.766355514526367
Product: Breathable Fitness Leggings Men's, Actual Price: [20.], Predicted Price: 19.697187423706055
Product: Domyos 520 Thick Yoga Exercise Mat 67" x 24" x 8mm, Actual Price: [79.99], Predicted Price: 25.68659782409668
Product: Breathable Curved Hem Workout T-Shirt Women's, Actual Price: [30.], Predicted Price: 20.188026428222656
Product: Forclaz Trek 900, Long Sleeved Merino Wool Hiking T-Shirt, Men's, Actual Price: [25.], Predicted Price: 20.999467849731445
Product: SE100 Easy Basketball Shoes ', Actual Price: [10.], Predicted Price: 21.237810134887695
Product: Warm Gym Hoodie ', Actual Price: [59.99], Predicted Price: 22.534818649291992
Product: Women's Long-Sleeved Zip Shirt Run Dry, Actual Price: [10.], Predicted Price: 19.8901424407959
Product:

In [8]:
predictions = model.predict(X_test)
predicted_prices = scaler_y.inverse_transform(predictions)
max_price_index = np.argmax(predicted_prices)
best_product = data.iloc[max_price_index]['product_name']
best_company = data.iloc[max_price_index]['brand']
print(f"Best Product: {best_product}, Best Company: {best_company}")

Best Product: Wedze 500 Piste Ski Jacket Women's, Best Company: Wedze
