In [3]:
import pandas as pd
from sklearn.decomposition import TruncatedSVD
import joblib
import matplotlib.pyplot as plt

# Load your dataset
file_path = 'productsdataset.xlsx'
data = pd.read_excel(file_path)

print("Dataset Preview:")
print(data.head())

# Create a time-item matrix for stock analysis
time_item_matrix = data.pivot_table(
    index='Date', 
    columns='Item Name', 
    values='Stock Level', 
    aggfunc='sum'
).fillna(0)

print("\nTime-Item Matrix Preview:")
print(time_item_matrix.head())

# Normalize the matrix by subtracting the mean per time period
time_item_matrix_norm = time_item_matrix.subtract(time_item_matrix.mean(axis=1), axis=0)

# Train the TruncatedSVD model for dimensionality reduction
n_components = 4  # Number of latent features to extract
model = TruncatedSVD(n_components=n_components, random_state=42)
model.fit(time_item_matrix_norm)

# Display explained variance ratio for the selected components
explained_variance = model.explained_variance_ratio_
print("\nExplained Variance Ratio by Components:")
for i, variance in enumerate(explained_variance, start=1):
    print(f"Component {i}: {variance:.4f}")

# Visualize the cumulative explained variance
cumulative_variance = explained_variance.cumsum()
plt.figure(figsize=(8, 6))
plt.plot(range(1, n_components + 1), cumulative_variance, marker='o', linestyle='-', color='b')
plt.title("Cumulative Explained Variance by Truncated SVD Components", fontsize=14)
plt.xlabel("Number of Components", fontsize=12)
plt.ylabel("Cumulative Explained Variance", fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.savefig('cumulative_variance.png')
plt.close()

# Save the model and time-item matrix
joblib.dump(model, 'stock_recommendation_model.joblib')
joblib.dump(time_item_matrix, 'time_item_matrix.joblib')
print("\nStock recommendation model saved successfully!")

# Function to get stock recommendations
def get_stock_recommendations(model, time_item_matrix, top_n=5):
    # Transform the time-item matrix
    transformed_matrix = model.transform(time_item_matrix)
    
    # Reconstruct the matrix
    reconstructed_matrix = model.inverse_transform(transformed_matrix)
    
    # Convert back to DataFrame
    reconstructed_df = pd.DataFrame(reconstructed_matrix, index=time_item_matrix.index, columns=time_item_matrix.columns)
    
    # Calculate the difference between actual and reconstructed values
    diff_df = time_item_matrix - reconstructed_df
    
    # Get the products that need restocking (negative values indicate low stock)
    restock_needed = diff_df.min().sort_values().head(top_n)
    
    return restock_needed

# Get stock recommendations
recommendations = get_stock_recommendations(model, time_item_matrix_norm)
print("\nTop 5 Products to Restock:")
print(recommendations)



Dataset Preview:
         Date                                    Item Name  Stock Level
0  2024-07-15  Photo Dynamic Therapy(PDT) Led (Chest/Back)           86
1  2024-02-05              Sheet Masks - Gold-All-in-1Mask           25
2  2024-02-18              Sheet Masks - Gold-All-in-1Mask           76
3  2024-05-28                         Gel Masks - 24K Gold           24
4  2024-06-27                       Gel Masks - Hyaluronic           44

Time-Item Matrix Preview:
Item Name   24K Gold  Bamboo Charcoal  Bulgarian Rose  Collagen Mask  \
Date                                                                   
2017-01-01       0.0              0.0             0.0            0.0   
2017-01-02       0.0              0.0             0.0            0.0   
2017-01-03       0.0              0.0             0.0            0.0   
2017-01-04       0.0              0.0             0.0            0.0   
2017-01-05       0.0              0.0             0.0            0.0   

Item Name   Fresh M