# Demo using your uploaded dataset
This notebook runs the full pipeline on your uploaded dataset `data/sample.csv`.

In [1]:
import sys, os
sys.path.append(os.path.abspath('..'))

import pandas as pd
from scripts.preprocessing import ensure_nltk, clean_text
from scripts.sentiment_model import train_sentiment_model
from scripts.recommender import ItemRecommender

ensure_nltk()

print('Loading dataset...')
df = pd.read_csv('../data/sample.csv', low_memory=False)
print('Columns:', df.columns.tolist())
print('Rows:', len(df))


Loading dataset...
Columns: ['id', 'name', 'asins', 'brand', 'categories', 'keys', 'manufacturer', 'reviews.date', 'reviews.dateAdded', 'reviews.dateSeen', 'reviews.didPurchase', 'reviews.doRecommend', 'reviews.id', 'reviews.numHelpful', 'reviews.rating', 'reviews.sourceURLs', 'reviews.text', 'reviews.title', 'reviews.userCity', 'reviews.userProvince', 'reviews.username']
Rows: 34660


In [2]:
print('\nTraining sentiment model...')
tfidf, clf = train_sentiment_model(df)



Training sentiment model...
Accuracy: 0.9329197922677438
Classification Report:
               precision    recall  f1-score   support

    negative       1.00      0.02      0.05       169
     neutral       0.40      0.01      0.01       300
    positive       0.93      1.00      0.97      6463

    accuracy                           0.93      6932
   macro avg       0.78      0.34      0.34      6932
weighted avg       0.91      0.93      0.90      6932



In [3]:
print('\nFitting recommender...')
rec = ItemRecommender()
rec.fit(df)
# choose a sample product id (first one)
asin_col = next((c for c in ['asins','asin','product_id','productId','sku'] if c in df.columns), 'asins')
example_asin = df[asin_col].iloc[0]
print('Top recommendations for', example_asin, ':', rec.recommend(example_asin, topk=5))



Fitting recommender...
Top recommendations for B01AHB9CN2 : [('B018Y229OU', 0.9862207205798784), ('B018Y225IA', 0.985373253457339), ('B018SZT3BK', 0.9750031150635035), ('B00TSUGXKE', 0.9709524482745058), ('B01AHB9CYG', 0.9474603583530589)]
