In [1]:
# Install CLIP library
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ftfy
Successfully installed ftfy-6.3.1
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-thb4ty_p
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-thb4ty_p
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->clip==1.0)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->clip==1.0)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86

In [3]:
import json
import pandas as pd
import requests
import os
import io
from PIL import Image
from tqdm import tqdm
import pickle
from collections import defaultdict
import gc
import psutil
import shutil
import zipfile

# Paths
metadata_path = r"/kaggle/input/amazon-dataset/meta_Clothing_Shoes_and_Jewelry.json"
reviews_path = r"/kaggle/input/amazon-dataset/Clothing_Shoes_and_Jewelry.json"
output_pickle = r"/kaggle/working/preprocessed_fashion_data.pkl"
output_interactions = r"/kaggle/working/user_item_interactions.pkl"
image_folder = "/kaggle/working/images"
output_zip = "/kaggle/working/amazon_fashion_data.zip"

# Clean image folder to free disk space
if os.path.exists(image_folder):
    shutil.rmtree(image_folder)
os.makedirs(image_folder, exist_ok=True)

# Helper: Print memory usage
def print_memory_usage(step=""):
    process = psutil.Process()
    mem_info = process.memory_info()
    print(f"Memory usage after {step}: {mem_info.rss / 1024 ** 2:.2f} MB")

# Helper: Download and save one image per item
def download_first_image(urls, asin):
    for i, url in enumerate(urls):
        try:
            response = requests.get(url, timeout=5)
            if response.status_code == 200:
                img = Image.open(io.BytesIO(response.content))
                img.verify()  # Verify image integrity
                img_path = os.path.join(image_folder, f"{asin}.jpg")
                with open(img_path, 'wb') as f:
                    f.write(response.content)
                return img_path, url
        except Exception:
            continue
    return None, None

# Step 1: Load and filter metadata
print("Loading metadata...")
meta_asin_to_data = {}
valid_asins = set()
with open(metadata_path, 'r', encoding='utf-8') as f:
    for line in tqdm(f, desc="Reading metadata"):
        data = json.loads(line)
        asin = data.get('asin')
        if not asin:
            continue
        title = data.get('title', '')
        description = data.get('description', '')
        if isinstance(description, list):
            description = ' '.join([str(item) for item in description if item])
        elif not isinstance(description, str):
            description = ''
        brand = data.get('brand', '')
        categories = data.get('categories', [])
        if isinstance(categories, list):
            categories = ' '.join([str(cat) for sublist in categories for cat in sublist if cat])
        else:
            categories = ''
        image_urls = data.get('imageURLHighRes', [])
        textual_features = ' '.join([title, description, brand, categories]).strip()
        if textual_features and image_urls:
            meta_asin_to_data[asin] = {
                'textual_features': textual_features,
                'imageURLs': image_urls,
            }
            valid_asins.add(asin)
print(f"Found {len(valid_asins)} items with textual features and image URLs")
print_memory_usage("metadata loading")
del data, line
gc.collect()

# Step 2: Load reviews and include overall rating
print("Loading reviews...")
chunk_size = 50000
all_reviews = []
with open(reviews_path, 'r', encoding='utf-8') as f:
    reviews_chunk = []
    for line in tqdm(f, desc="Reading reviews"):
        data = json.loads(line)
        asin = data.get('asin')
        if asin in valid_asins:
            reviews_chunk.append({
                'reviewerID': data.get('reviewerID'),
                'asin': asin,
                'overall': data.get('overall', 0.0),
            })
        if len(reviews_chunk) >= chunk_size:
            df_chunk = pd.DataFrame(reviews_chunk)
            reviews_chunk = []  # Clear chunk
            # Step 3: Filter users and items with >=5 interactions
            user_counts = df_chunk['reviewerID'].value_counts()
            item_counts = df_chunk['asin'].value_counts()
            df_chunk = df_chunk[
                (df_chunk['reviewerID'].isin(user_counts[user_counts >= 5].index)) &
                (df_chunk['asin'].isin(item_counts[item_counts >= 5].index))
            ]
            if not df_chunk.empty:
                # Step 4: Merge metadata
                df_chunk['textual_features'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['textual_features'])
                df_chunk['imageURLs'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['imageURLs'])
                all_reviews.extend(df_chunk.to_dict('records'))
            del df_chunk, user_counts, item_counts
            print_memory_usage("processing review chunk")
            gc.collect()
    if reviews_chunk:
        df_chunk = pd.DataFrame(reviews_chunk)
        user_counts = df_chunk['reviewerID'].value_counts()
        item_counts = df_chunk['asin'].value_counts()
        df_chunk = df_chunk[
            (df_chunk['reviewerID'].isin(user_counts[user_counts >= 5].index)) &
            (df_chunk['asin'].isin(item_counts[item_counts >= 5].index))
        ]
        if not df_chunk.empty:
            df_chunk['textual_features'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['textual_features'])
            df_chunk['imageURLs'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['imageURLs'])
            all_reviews.extend(df_chunk.to_dict('records'))
        del df_chunk, user_counts, item_counts, reviews_chunk
        print_memory_usage("processing final review chunk")
        gc.collect()

# Build DataFrame
print("Building DataFrame...")
df = pd.DataFrame(all_reviews)
print_memory_usage("building DataFrame")
del all_reviews
gc.collect()

# Step 5: Download one image per item
print("Downloading one image per item...")
asin_to_image = {}
unique_asins = df['asin'].unique()
for asin in tqdm(unique_asins, desc="Downloading images"):
    urls = meta_asin_to_data[asin]['imageURLs']
    image_path, image_url = download_first_image(urls, asin)
    if image_path and image_url:
        asin_to_image[asin] = {'image_path': image_path, 'image_url': image_url}

# Filter DataFrame to keep only items with downloadable images
print("Filtering items without downloadable images...")
df = df[df['asin'].isin(asin_to_image.keys())].reset_index(drop=True)
df['image_path'] = df['asin'].map(lambda x: asin_to_image[x]['image_path'])
df['image_url'] = df['asin'].map(lambda x: asin_to_image[x]['image_url'])
print(f"Dataset size after image filtering: {len(df)} samples")
print_memory_usage("image downloading and filtering")
del asin_to_image
gc.collect()

# Verify image storage
print("Verifying image storage...")
image_count = len([f for f in os.listdir(image_folder) if f.endswith('.jpg')])
print(f"Total images stored: {image_count}")
if image_count == 0:
    print("Warning: No images were stored. Check image URLs or network connectivity.")

# Step 6: Build user-item interaction dictionary
print("Building user-item interaction dictionary...")
user_item_dict = defaultdict(list)
for idx, row in df.iterrows():
    user_item_dict[row['reviewerID']].append(
        (row['asin'], row['overall'], row['textual_features'], row['image_path'])
    )
print_memory_usage("building interactions")
gc.collect()

# Save processed DataFrame
print(f"Saving processed DataFrame to {output_pickle}...")
df.to_pickle(output_pickle)
print_memory_usage("saving DataFrame")
gc.collect()

# Save user-item interactions
print(f"Saving user-item interactions to {output_interactions}...")
with open(output_interactions, 'wb') as f:
    pickle.dump(user_item_dict, f)
print_memory_usage("saving interactions")
gc.collect()

# Create ZIP file of all outputs
print(f"Creating ZIP file: {output_zip}...")
with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(image_folder):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, "/kaggle/working")
            zipf.write(file_path, arcname)
            print(f"Added {arcname} to ZIP")
    
    for file in ["preprocessed_fashion_data.pkl", "user_item_interactions.pkl"]:
        file_path = os.path.join("/kaggle/working", file)
        arcname = os.path.relpath(file_path, "/kaggle/working")
        zipf.write(file_path, arcname)
        print(f"Added {arcname} to ZIP")

# Verify ZIP file size
zip_size = os.path.getsize(output_zip) / (1024 ** 3)  # Size in GiB
print(f"ZIP file created: {output_zip} (Size: {zip_size:.2f} GiB)")
print("You can now download the ZIP file from the Output tab in Kaggle.")

print(f"✅ Done! Final dataset size: {len(df)} samples, {len(user_item_dict)} users")

Loading metadata...


Reading metadata: 2685059it [02:24, 18547.69it/s]


Found 1998477 items with textual features and image URLs
Memory usage after metadata loading: 4200.05 MB
Loading reviews...


Reading reviews: 71806it [00:02, 22307.27it/s] 

Memory usage after processing review chunk: 4203.12 MB


Reading reviews: 118768it [00:04, 20734.07it/s]

Memory usage after processing review chunk: 4203.16 MB


Reading reviews: 170560it [00:06, 21448.18it/s]

Memory usage after processing review chunk: 4204.16 MB


Reading reviews: 229350it [00:09, 21075.65it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 276732it [00:11, 20282.06it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 326640it [00:13, 20941.87it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 381301it [00:15, 20562.83it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 430670it [00:17, 21032.47it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 489515it [00:20, 21521.41it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 544493it [00:22, 25498.33it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 585375it [00:24, 18711.39it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 647155it [00:26, 19392.05it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 701248it [00:29, 19366.59it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 745396it [00:31, 18835.91it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 801206it [00:33, 20310.27it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 858752it [00:35, 21511.76it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 915288it [00:38, 21218.44it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 965304it [00:40, 21096.83it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 1017374it [00:42, 19235.34it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 1069800it [00:44, 19242.02it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 1137807it [00:47, 27357.46it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 1185722it [00:49, 21544.08it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 1245091it [00:51, 27108.94it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 1291124it [00:54, 21073.08it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 1350841it [00:56, 27230.45it/s]

Memory usage after processing review chunk: 4204.41 MB


Reading reviews: 1396304it [00:58, 20554.01it/s]

Memory usage after processing review chunk: 4204.54 MB


Reading reviews: 1452525it [01:00, 19658.49it/s]

Memory usage after processing review chunk: 4204.54 MB


Reading reviews: 1512176it [01:03, 18510.56it/s]

Memory usage after processing review chunk: 4204.54 MB


Reading reviews: 1563633it [01:05, 18591.49it/s]

Memory usage after processing review chunk: 4204.54 MB


Reading reviews: 1617174it [01:07, 19381.90it/s]

Memory usage after processing review chunk: 4204.54 MB


Reading reviews: 1676232it [01:10, 18077.61it/s]

Memory usage after processing review chunk: 4204.54 MB


Reading reviews: 1728860it [01:12, 19287.85it/s]

Memory usage after processing review chunk: 4204.54 MB


Reading reviews: 1784730it [01:14, 20357.21it/s]

Memory usage after processing review chunk: 4204.54 MB


Reading reviews: 1837991it [01:17, 19504.67it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 1891605it [01:19, 18799.96it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 1941613it [01:21, 18625.83it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2004383it [01:24, 25572.17it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2050907it [01:26, 20617.28it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2108211it [01:28, 20362.74it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2163784it [01:30, 20333.47it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2220941it [01:33, 20424.23it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2274522it [01:35, 20011.13it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2325375it [01:37, 18049.70it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2376491it [01:40, 18730.57it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2429247it [01:42, 19550.08it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2487314it [01:44, 20267.86it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2540743it [01:46, 20015.15it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2597831it [01:49, 20764.88it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2650522it [01:51, 19237.11it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2704543it [01:53, 19473.53it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2752038it [01:56, 19771.80it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2803934it [01:58, 18838.18it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2854527it [02:00, 18643.39it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2911112it [02:02, 20347.37it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 2970379it [02:05, 20806.17it/s]

Memory usage after processing review chunk: 4204.66 MB


Reading reviews: 3029629it [02:07, 26457.01it/s]

Memory usage after processing review chunk: 4204.79 MB


Reading reviews: 3077324it [02:09, 21320.21it/s]

Memory usage after processing review chunk: 4204.79 MB


Reading reviews: 3139313it [02:11, 22322.23it/s]

Memory usage after processing review chunk: 4204.79 MB


Reading reviews: 3184759it [02:14, 19301.91it/s]

Memory usage after processing review chunk: 4204.79 MB


Reading reviews: 3237010it [02:16, 19242.68it/s]

Memory usage after processing review chunk: 4204.79 MB


Reading reviews: 3297849it [02:18, 21972.38it/s]

Memory usage after processing review chunk: 4204.79 MB


Reading reviews: 3347059it [02:20, 20522.31it/s]

Memory usage after processing review chunk: 4204.79 MB


Reading reviews: 3392006it [02:23, 14217.12it/s]

Memory usage after processing review chunk: 4204.79 MB


Reading reviews: 3445305it [02:25, 16268.61it/s]

Memory usage after processing review chunk: 4204.79 MB


Reading reviews: 3499977it [02:27, 15920.53it/s]

Memory usage after processing review chunk: 4205.16 MB


Reading reviews: 3545387it [02:30, 9127.73it/s] 

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 3602929it [02:33, 9273.55it/s] 

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 3658666it [02:36, 10711.88it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 3714411it [02:39, 6769.48it/s] 

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 3763361it [02:40, 55137.25it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 3822706it [02:45, 12268.97it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 3889394it [02:47, 20050.27it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 3938641it [02:50, 11371.16it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4000237it [02:53, 14667.16it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4042162it [02:53, 49355.21it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4123514it [02:59, 23088.17it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4167430it [03:01, 19132.99it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4219897it [03:03, 18860.74it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4281840it [03:06, 20184.13it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4337074it [03:08, 19466.88it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4385802it [03:10, 18250.86it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4440852it [03:12, 19094.30it/s]

Memory usage after processing review chunk: 4205.29 MB


Reading reviews: 4495642it [03:15, 19286.55it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 4554887it [03:17, 21176.53it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 4615283it [03:19, 21712.58it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 4664094it [03:22, 20854.90it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 4713208it [03:24, 17494.52it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 4771524it [03:27, 18886.07it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 4825776it [03:29, 19715.22it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 4872001it [03:31, 12624.28it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 4933723it [03:34, 18231.37it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 4987380it [03:36, 18761.17it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 5054200it [03:39, 24893.83it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 5100104it [03:42, 19084.47it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 5152099it [03:44, 16730.89it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 5199536it [03:47, 13143.46it/s]

Memory usage after processing review chunk: 4205.41 MB


Reading reviews: 5266315it [03:49, 18339.75it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5322399it [03:52, 19493.83it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5380249it [03:54, 22912.70it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5427617it [03:57, 14978.28it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5476488it [03:59, 15841.32it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5522703it [04:02, 10534.76it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5584886it [04:05, 11918.18it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5635098it [04:08, 14180.75it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5699940it [04:10, 19129.21it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5749136it [04:13, 16906.30it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5817372it [04:15, 19462.22it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5860589it [04:18, 13614.44it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5917032it [04:21, 9201.11it/s] 

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 5969021it [04:24, 7970.57it/s] 

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6026565it [04:27, 9655.06it/s] 

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6079840it [04:30, 9771.02it/s] 

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6148366it [04:33, 19195.43it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6203974it [04:35, 19867.25it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6246375it [04:37, 17983.55it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6303258it [04:40, 19988.59it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6357924it [04:42, 20433.32it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6400970it [04:44, 18005.78it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6463271it [04:47, 19385.67it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6519802it [04:49, 19930.38it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6574770it [04:51, 24547.12it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6625162it [04:54, 18946.96it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6679503it [04:56, 19300.89it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6744169it [04:58, 20052.70it/s]

Memory usage after processing review chunk: 4205.54 MB


Reading reviews: 6802857it [05:01, 20477.87it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 6847406it [05:03, 18724.05it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 6903667it [05:05, 19855.59it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 6965750it [05:08, 19889.59it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7023480it [05:10, 20806.17it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7078128it [05:12, 19863.88it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7131268it [05:14, 18275.83it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7189520it [05:17, 19719.46it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7246969it [05:19, 20703.79it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7300665it [05:21, 24364.45it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7348494it [05:24, 20645.22it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7407922it [05:26, 21171.55it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7458630it [05:28, 18236.79it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7514966it [05:31, 20156.71it/s]

Memory usage after processing review chunk: 4205.66 MB


Reading reviews: 7575396it [05:33, 21241.83it/s]

Memory usage after processing review chunk: 4205.79 MB


Reading reviews: 7622413it [05:35, 19625.75it/s]

Memory usage after processing review chunk: 4205.79 MB


Reading reviews: 7679964it [05:37, 20310.97it/s]

Memory usage after processing review chunk: 4205.79 MB


Reading reviews: 7735095it [05:40, 20018.15it/s]

Memory usage after processing review chunk: 4205.79 MB


Reading reviews: 7792585it [05:42, 20498.91it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 7848732it [05:44, 19809.12it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 7905034it [05:47, 19287.44it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 7965273it [05:49, 26166.29it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8010354it [05:51, 20074.09it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8067097it [05:54, 20240.11it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8123907it [05:56, 20538.65it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8175037it [05:58, 18922.69it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8228576it [06:01, 18942.98it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8285450it [06:03, 20138.58it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8339036it [06:05, 18923.24it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8399006it [06:08, 26161.65it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8444437it [06:10, 20804.79it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8502200it [06:12, 20542.63it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8559903it [06:15, 20857.10it/s]

Memory usage after processing review chunk: 4205.91 MB


Reading reviews: 8606202it [06:17, 19466.91it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 8666991it [06:19, 21046.57it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 8722818it [06:21, 20180.97it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 8784271it [06:24, 21622.59it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 8832342it [06:26, 20206.41it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 8895140it [06:28, 21844.15it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 8944333it [06:31, 20523.94it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9005616it [06:33, 21444.42it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9060202it [06:35, 20222.12it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9114411it [06:38, 19323.26it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9167683it [06:40, 19755.24it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9229596it [06:42, 19679.17it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9282815it [06:45, 19172.32it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9339544it [06:47, 24802.05it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9385758it [06:49, 19958.16it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9443384it [06:52, 19922.09it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9498702it [06:54, 19404.02it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9555381it [06:56, 19907.27it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9601728it [06:58, 14774.00it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9661377it [07:01, 15430.20it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9717187it [07:04, 19513.94it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9774233it [07:06, 20644.95it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9821376it [07:08, 19675.96it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9882581it [07:10, 21384.02it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9942564it [07:13, 21279.84it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 9990498it [07:15, 13982.25it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10051984it [07:18, 18952.27it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10105424it [07:20, 18277.14it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10164589it [07:22, 20809.09it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10227438it [07:25, 19545.70it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10273664it [07:27, 17968.74it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10332895it [07:30, 20501.43it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10388020it [07:32, 20201.97it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10448015it [07:34, 26641.92it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10494619it [07:36, 20763.96it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10551059it [07:39, 20083.74it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10608013it [07:41, 20750.82it/s]

Memory usage after processing review chunk: 4206.04 MB


Reading reviews: 10652917it [07:43, 19182.68it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 10718830it [07:46, 20848.87it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 10765020it [07:48, 20047.07it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 10824735it [07:50, 21027.36it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 10872191it [07:52, 20058.37it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 10928115it [07:55, 19210.69it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 10983139it [07:57, 19297.20it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11046147it [07:59, 22448.87it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11093911it [08:02, 20665.90it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11144961it [08:04, 20929.77it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11202612it [08:06, 20919.63it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11260608it [08:08, 19861.73it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11308845it [08:11, 19342.35it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11360593it [08:13, 17322.64it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11415230it [08:16, 17569.27it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11473246it [08:18, 19984.52it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11531418it [08:20, 20212.44it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11578231it [08:23, 19344.24it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11635295it [08:25, 19286.07it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11678312it [08:27, 16617.19it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11740841it [08:30, 18741.75it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11799483it [08:32, 20274.03it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11844459it [08:35, 18243.86it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11901168it [08:37, 18363.51it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 11956543it [08:39, 19228.65it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 12011280it [08:42, 18744.22it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 12066067it [08:44, 18570.65it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 12122241it [08:47, 19265.05it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 12175869it [08:49, 18905.46it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 12230766it [08:51, 18971.36it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 12281350it [08:54, 18017.19it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 12335368it [08:56, 18745.50it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12380088it [08:58, 17906.18it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12440290it [09:01, 17869.53it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12497134it [09:03, 19963.79it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12543152it [09:05, 18940.44it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12599620it [09:08, 19836.99it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12652238it [09:10, 18739.98it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12706546it [09:12, 19644.04it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12762325it [09:15, 19527.03it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12815605it [09:17, 18212.64it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12858070it [09:20, 11024.42it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12917554it [09:25, 6625.12it/s] 

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 12983454it [09:29, 13270.95it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 13024483it [09:32, 10101.37it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 13077351it [09:35, 10623.13it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13133678it [09:38, 11931.44it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13190594it [09:41, 10910.81it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13244500it [09:44, 9498.14it/s] 

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13302125it [09:47, 16945.07it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13356933it [09:49, 18973.01it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13412040it [09:51, 19719.63it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13468725it [09:54, 19210.26it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13523248it [09:56, 18578.39it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13576597it [09:59, 18189.70it/s]

Memory usage after processing review chunk: 4206.16 MB


Reading reviews: 13636727it [10:01, 18704.34it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 13686697it [10:04, 17296.09it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 13735943it [10:06, 17472.10it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 13786637it [10:08, 17372.67it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 13842352it [10:11, 19248.11it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 13895950it [10:13, 18727.64it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 13954478it [10:15, 18427.26it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14007556it [10:18, 18648.83it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14057563it [10:20, 17324.83it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14109489it [10:22, 18453.75it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14165905it [10:25, 17338.63it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14209780it [10:27, 15154.81it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14262174it [10:30, 15385.23it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14312629it [10:33, 11634.13it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14375243it [10:35, 13720.70it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14422135it [10:38, 11926.07it/s]

Memory usage after processing review chunk: 4206.29 MB


Reading reviews: 14476063it [10:41, 12436.75it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 14534404it [10:44, 13793.22it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 14583984it [10:46, 12320.88it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 14633864it [10:49, 12960.45it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 14694402it [10:52, 13095.68it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 14748182it [10:54, 17622.90it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 14811723it [10:57, 24963.83it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 14861491it [10:59, 18473.67it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 14913222it [11:01, 17841.17it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 14966992it [11:04, 18625.54it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15020783it [11:06, 18547.27it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15072152it [11:08, 18113.28it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15127726it [11:11, 19402.63it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15182309it [11:13, 19014.68it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15236825it [11:15, 19183.81it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15295453it [11:18, 20846.07it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15353954it [11:20, 20665.44it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15407736it [11:22, 24413.68it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15461217it [11:25, 25139.53it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15503870it [11:27, 19118.91it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15557529it [11:29, 19491.29it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15609769it [11:32, 18634.23it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15664140it [11:34, 19395.36it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15718612it [11:36, 19497.50it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15771876it [11:39, 17727.50it/s]

Memory usage after processing review chunk: 4206.41 MB


Reading reviews: 15824390it [11:41, 18685.51it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 15875937it [11:43, 18228.93it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 15926085it [11:46, 17678.14it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 15979629it [11:48, 18660.83it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 16032387it [11:50, 18234.05it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 16082940it [11:53, 17944.48it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 16136671it [11:55, 18884.05it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 16187067it [11:57, 18555.77it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 16244459it [12:00, 20193.97it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 16301332it [12:02, 20829.30it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 16347880it [12:04, 19469.57it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 16409326it [12:07, 21810.21it/s]

Memory usage after processing review chunk: 4206.54 MB


Reading reviews: 16454138it [12:09, 18908.14it/s]

Memory usage after processing review chunk: 4206.79 MB


Reading reviews: 16512332it [12:11, 19897.71it/s]

Memory usage after processing review chunk: 4206.79 MB


Reading reviews: 16563847it [12:14, 22421.19it/s]

Memory usage after processing review chunk: 4206.79 MB


Reading reviews: 16615334it [12:16, 18816.09it/s]

Memory usage after processing review chunk: 4206.79 MB


Reading reviews: 16670077it [12:18, 19409.31it/s]

Memory usage after processing review chunk: 4206.79 MB


Reading reviews: 16722528it [12:21, 18572.15it/s]

Memory usage after processing review chunk: 4206.79 MB


Reading reviews: 16774942it [12:23, 19579.31it/s]

Memory usage after processing review chunk: 4206.79 MB


Reading reviews: 16829947it [12:25, 19746.38it/s]

Memory usage after processing review chunk: 4206.79 MB


Reading reviews: 16888151it [12:27, 20615.18it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 16934017it [12:30, 19110.42it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 16984788it [12:32, 16937.94it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17041097it [12:34, 19268.59it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17092437it [12:37, 17711.24it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17153189it [12:39, 20435.83it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17197168it [12:41, 18105.64it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17261889it [12:44, 19416.14it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17315093it [12:46, 20123.64it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17359604it [12:48, 18468.16it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17418965it [12:51, 20997.94it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17474073it [12:53, 19977.94it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17531846it [12:55, 25714.35it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17575043it [12:57, 19435.13it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17626694it [13:00, 18958.85it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17682852it [13:02, 19936.63it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17741027it [13:04, 20953.59it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17799977it [13:07, 27154.32it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17847066it [13:09, 20654.99it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17891421it [13:11, 18874.94it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17944753it [13:14, 18155.06it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 17995887it [13:16, 19019.20it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18050671it [13:18, 19539.47it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18106896it [13:20, 20383.19it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18152186it [13:23, 19088.89it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18210687it [13:25, 26074.95it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18258053it [13:27, 20752.91it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18314144it [13:30, 25528.17it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18360110it [13:32, 20102.94it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18406223it [13:34, 19175.44it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18459945it [13:36, 18883.49it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18510590it [13:39, 17241.86it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18564117it [13:41, 18358.24it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18607768it [13:43, 13986.29it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18670999it [13:46, 18787.05it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18722067it [13:48, 17821.42it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18779252it [13:51, 24659.68it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18822744it [13:53, 19144.84it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18876276it [13:55, 19428.91it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18928977it [13:57, 18081.70it/s]

Memory usage after processing review chunk: 4207.16 MB


Reading reviews: 18983950it [14:00, 19692.25it/s]

Memory usage after processing review chunk: 4207.29 MB


Reading reviews: 19048433it [14:02, 27296.99it/s]

Memory usage after processing review chunk: 4207.29 MB


Reading reviews: 19085947it [14:04, 20275.07it/s]

Memory usage after processing review chunk: 4207.29 MB


Reading reviews: 19142564it [14:07, 19952.71it/s]

Memory usage after processing review chunk: 4207.29 MB


Reading reviews: 19201110it [14:09, 25525.95it/s]

Memory usage after processing review chunk: 4207.29 MB


Reading reviews: 19244948it [14:11, 18822.78it/s]

Memory usage after processing review chunk: 4207.29 MB


Reading reviews: 19306488it [14:14, 26768.71it/s]

Memory usage after processing review chunk: 4207.29 MB


Reading reviews: 19350596it [14:16, 20019.47it/s]

Memory usage after processing review chunk: 4207.29 MB


Reading reviews: 19410556it [14:18, 26253.68it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19458173it [14:20, 20848.21it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19502236it [14:23, 18087.73it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19563566it [14:25, 21418.53it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19609643it [14:27, 19277.18it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19670880it [14:29, 26890.21it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19712492it [14:32, 18449.88it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19766874it [14:34, 20095.00it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19828034it [14:36, 27749.93it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19875153it [14:38, 21107.09it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19924083it [14:41, 20247.94it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 19971245it [14:43, 19027.24it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 20030762it [14:45, 18608.01it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 20085819it [14:48, 24915.63it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 20120549it [14:50, 14166.54it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 20178224it [14:53, 17210.40it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 20232117it [14:55, 17433.36it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 20287981it [14:57, 19600.92it/s]

Memory usage after processing review chunk: 4207.41 MB


Reading reviews: 20335799it [14:59, 20070.43it/s]

Memory usage after processing review chunk: 4207.54 MB


Reading reviews: 20394932it [15:02, 20894.39it/s]

Memory usage after processing review chunk: 4207.54 MB


Reading reviews: 20451146it [15:04, 26259.53it/s]

Memory usage after processing review chunk: 4207.54 MB


Reading reviews: 20496138it [15:06, 20158.01it/s]

Memory usage after processing review chunk: 4207.54 MB


Reading reviews: 20544276it [15:09, 20114.73it/s]

Memory usage after processing review chunk: 4207.54 MB


Reading reviews: 20598922it [15:11, 19886.03it/s]

Memory usage after processing review chunk: 4207.54 MB


Reading reviews: 20646374it [15:13, 19900.14it/s]

Memory usage after processing review chunk: 4207.54 MB


Reading reviews: 20698290it [15:15, 19109.36it/s]

Memory usage after processing review chunk: 4207.54 MB


Reading reviews: 20755095it [15:18, 20642.66it/s]

Memory usage after processing review chunk: 4207.54 MB


Reading reviews: 20800818it [15:20, 18888.16it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 20854361it [15:22, 19300.96it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 20915285it [15:24, 21553.85it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 20962068it [15:27, 19629.43it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21008636it [15:29, 19679.00it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21066261it [15:31, 20125.07it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21121722it [15:33, 19953.20it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21168599it [15:36, 19665.45it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21227137it [15:38, 20895.28it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21270170it [15:40, 17915.61it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21328875it [15:43, 20485.24it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21384624it [15:45, 25163.51it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21432296it [15:47, 20411.05it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21489175it [15:49, 25726.78it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21533356it [15:52, 19452.93it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21588598it [15:54, 19332.57it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21634813it [15:56, 19286.00it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21694221it [15:58, 25857.48it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21738097it [16:01, 18910.49it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21794008it [16:03, 20223.35it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21840763it [16:05, 19538.04it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21896177it [16:08, 19479.79it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 21943776it [16:10, 19664.79it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22000121it [16:12, 19780.13it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22044298it [16:14, 18475.52it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22102823it [16:17, 20197.04it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22145253it [16:19, 18561.91it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22201473it [16:21, 19661.74it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22246913it [16:23, 18698.38it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22301520it [16:26, 17000.47it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22349069it [16:28, 19356.43it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22411055it [16:30, 21562.25it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22458617it [16:33, 20321.98it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22505929it [16:35, 19372.97it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22565245it [16:37, 21123.32it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22612020it [16:39, 19462.62it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22658526it [16:42, 19459.04it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22707312it [16:44, 20015.49it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22767166it [16:46, 21350.18it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22826776it [16:49, 21040.27it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22903110it [16:51, 19908.34it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 22968129it [16:54, 20068.36it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 23030144it [16:56, 19139.43it/s]

Memory usage after processing review chunk: 4207.66 MB


Reading reviews: 23094600it [16:58, 20092.80it/s]

Memory usage after processing review chunk: 4207.79 MB


Reading reviews: 23174096it [17:01, 28368.43it/s]

Memory usage after processing review chunk: 4207.91 MB


Reading reviews: 23230907it [17:03, 21210.53it/s]

Memory usage after processing review chunk: 4207.91 MB


Reading reviews: 23307691it [17:06, 21594.07it/s]

Memory usage after processing review chunk: 4208.04 MB


Reading reviews: 23374930it [17:08, 20961.10it/s]

Memory usage after processing review chunk: 4208.04 MB


Reading reviews: 23433637it [17:11, 20519.77it/s]

Memory usage after processing review chunk: 4208.04 MB


Reading reviews: 23504206it [17:13, 22102.66it/s]

Memory usage after processing review chunk: 4208.04 MB


Reading reviews: 23559515it [17:15, 20471.13it/s]

Memory usage after processing review chunk: 4208.04 MB


Reading reviews: 23631490it [17:18, 22139.84it/s]

Memory usage after processing review chunk: 4208.04 MB


Reading reviews: 23679286it [17:20, 15423.65it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 23754311it [17:23, 21127.01it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 23800536it [17:23, 56596.34it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 23885500it [17:28, 20578.87it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 23942463it [17:30, 16987.31it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24004499it [17:33, 13903.08it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24065743it [17:36, 19344.76it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24134216it [17:38, 21708.01it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24203270it [17:40, 20163.19it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24262462it [17:43, 20252.10it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24332196it [17:45, 21415.71it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24403451it [17:48, 21599.96it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24468747it [17:50, 25683.23it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24529544it [17:52, 22134.27it/s]

Memory usage after processing review chunk: 4208.16 MB


Reading reviews: 24597019it [17:55, 21786.52it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 24648649it [17:57, 15940.03it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 24714153it [18:00, 14991.15it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 24786737it [18:02, 19667.74it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 24839057it [18:05, 10493.51it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 24905415it [18:08, 11393.65it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 24969845it [18:11, 11129.39it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 25038491it [18:14, 15612.14it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 25111400it [18:17, 21873.62it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 25174194it [18:19, 22589.25it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 25235761it [18:21, 21609.26it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 25298760it [18:24, 22588.00it/s]

Memory usage after processing review chunk: 4208.29 MB


Reading reviews: 25364092it [18:26, 23274.67it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 25428423it [18:28, 22955.87it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 25491893it [18:31, 22422.76it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 25555226it [18:33, 22530.21it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 25616130it [18:35, 21777.20it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 25689923it [18:38, 23005.75it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 25753289it [18:40, 22083.34it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 25826151it [18:42, 22867.38it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 25886272it [18:45, 22163.57it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 25959923it [18:47, 29081.58it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 26023941it [18:50, 23388.71it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 26085250it [18:52, 21736.75it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 26146502it [18:54, 22095.89it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 26209395it [18:57, 22141.35it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 26270304it [18:59, 21765.65it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 26332892it [19:01, 21824.67it/s]

Memory usage after processing review chunk: 4208.41 MB


Reading reviews: 26392385it [19:04, 21041.83it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 26466594it [19:06, 23773.98it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 26530409it [19:08, 23041.68it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 26595338it [19:11, 28481.64it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 26659813it [19:13, 23478.05it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 26719165it [19:15, 21871.12it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 26781947it [19:17, 22037.05it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 26842087it [19:20, 21845.09it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 26912536it [19:22, 21712.91it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 26976496it [19:25, 22085.22it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 27040306it [19:27, 22918.75it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 27094403it [19:29, 17551.65it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 27167950it [19:31, 22328.31it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 27228817it [19:34, 21048.13it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 27286621it [19:36, 19652.93it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 27347800it [19:39, 16054.30it/s]

Memory usage after processing review chunk: 4208.54 MB


Reading reviews: 27407836it [19:44, 7626.67it/s] 

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 27471458it [19:47, 10655.91it/s]

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 27527164it [19:48, 54869.33it/s]

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 27602222it [19:53, 12726.88it/s]

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 27668956it [19:56, 13031.54it/s]

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 27732069it [19:58, 11517.25it/s]

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 27793415it [20:02, 10070.73it/s]

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 27852971it [20:04, 11105.61it/s]

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 27917069it [20:07, 12369.75it/s]

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 27983380it [20:10, 12776.63it/s]

Memory usage after processing review chunk: 4208.79 MB


Reading reviews: 28042057it [20:13, 10394.98it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28104564it [20:16, 10344.88it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28172253it [20:19, 12620.44it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28240792it [20:22, 17866.64it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28302033it [20:24, 21610.17it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28363794it [20:26, 21894.68it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28426354it [20:29, 22081.59it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28498456it [20:31, 21795.93it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28555020it [20:33, 20605.67it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28617073it [20:36, 21775.54it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28679542it [20:38, 21932.22it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28739307it [20:40, 20961.20it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28811050it [20:43, 22352.44it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28864925it [20:45, 19939.14it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28925713it [20:48, 21456.83it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 28988587it [20:50, 21992.34it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 29047896it [20:52, 21951.63it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 29109881it [20:55, 21699.64it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 29167975it [20:57, 20234.60it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 29238718it [20:59, 21030.13it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 29294620it [21:02, 19635.36it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 29357143it [21:04, 20280.72it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 29417447it [21:06, 21080.28it/s]

Memory usage after processing review chunk: 4208.91 MB


Reading reviews: 29478389it [21:09, 21669.76it/s]

Memory usage after processing review chunk: 4209.04 MB


Reading reviews: 29541987it [21:11, 21571.59it/s]

Memory usage after processing review chunk: 4209.04 MB


Reading reviews: 29600673it [21:13, 20708.24it/s]

Memory usage after processing review chunk: 4209.04 MB


Reading reviews: 29661102it [21:16, 21442.37it/s]

Memory usage after processing review chunk: 4209.16 MB


Reading reviews: 29723663it [21:18, 21831.56it/s]

Memory usage after processing review chunk: 4209.16 MB


Reading reviews: 29786504it [21:20, 22454.59it/s]

Memory usage after processing review chunk: 4209.16 MB


Reading reviews: 29851533it [21:23, 22416.94it/s]

Memory usage after processing review chunk: 4209.16 MB


Reading reviews: 29912933it [21:25, 22054.39it/s]

Memory usage after processing review chunk: 4209.16 MB


Reading reviews: 29972712it [21:27, 21137.16it/s]

Memory usage after processing review chunk: 4209.16 MB


Reading reviews: 30033449it [21:30, 21258.83it/s]

Memory usage after processing review chunk: 4209.16 MB


Reading reviews: 30092807it [21:32, 21700.45it/s]

Memory usage after processing review chunk: 4209.16 MB


Reading reviews: 30152488it [21:34, 21573.96it/s]

Memory usage after processing review chunk: 4209.29 MB


Reading reviews: 30201581it [21:37, 16011.61it/s]

Memory usage after processing review chunk: 4209.29 MB


Reading reviews: 30274593it [21:39, 21779.19it/s]

Memory usage after processing review chunk: 4209.29 MB


Reading reviews: 30336165it [21:42, 21238.48it/s]

Memory usage after processing review chunk: 4209.29 MB


Reading reviews: 30393549it [21:44, 19822.51it/s]

Memory usage after processing review chunk: 4209.29 MB


Reading reviews: 30456335it [21:46, 21553.52it/s]

Memory usage after processing review chunk: 4209.29 MB


Reading reviews: 30506773it [21:49, 20978.82it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 30566295it [21:51, 21101.48it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 30625073it [21:53, 20831.98it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 30686972it [21:56, 21513.15it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 30748255it [21:58, 21891.28it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 30809317it [22:00, 21437.86it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 30870405it [22:03, 21554.75it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 30930863it [22:05, 21696.65it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 30992049it [22:07, 21697.82it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 31048946it [22:09, 20630.83it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 31106726it [22:12, 20081.67it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 31166700it [22:14, 20917.26it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 31218460it [22:17, 19178.06it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 31276456it [22:19, 20229.51it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 31335837it [22:21, 19698.27it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 31398560it [22:24, 21979.91it/s]

Memory usage after processing review chunk: 4209.54 MB


Reading reviews: 31460781it [22:26, 21541.12it/s]

Memory usage after processing review chunk: 4209.66 MB


Reading reviews: 31525152it [22:28, 28622.62it/s]

Memory usage after processing review chunk: 4209.66 MB


Reading reviews: 31574001it [22:30, 21337.77it/s]

Memory usage after processing review chunk: 4209.79 MB


Reading reviews: 31634678it [22:33, 21105.10it/s]

Memory usage after processing review chunk: 4209.79 MB


Reading reviews: 31691398it [22:35, 20299.37it/s]

Memory usage after processing review chunk: 4209.79 MB


Reading reviews: 31746080it [22:38, 19662.38it/s]

Memory usage after processing review chunk: 4209.79 MB


Reading reviews: 31810278it [22:40, 19238.59it/s]

Memory usage after processing review chunk: 4209.79 MB


Reading reviews: 31869433it [22:42, 20590.57it/s]

Memory usage after processing review chunk: 4209.79 MB


Reading reviews: 31925619it [22:45, 20074.68it/s]

Memory usage after processing review chunk: 4209.79 MB


Reading reviews: 31981230it [22:47, 19047.20it/s]

Memory usage after processing review chunk: 4209.79 MB


Reading reviews: 32036372it [22:49, 20082.88it/s]

Memory usage after processing review chunk: 4209.91 MB


Reading reviews: 32094899it [22:52, 20814.56it/s]

Memory usage after processing review chunk: 4209.91 MB


Reading reviews: 32154311it [22:54, 20916.95it/s]

Memory usage after processing review chunk: 4209.91 MB


Reading reviews: 32211806it [22:56, 19675.33it/s]

Memory usage after processing review chunk: 4210.04 MB


Reading reviews: 32264588it [22:59, 18813.57it/s]

Memory usage after processing review chunk: 4210.04 MB


Reading reviews: 32292099it [22:59, 23408.27it/s]


Memory usage after processing final review chunk: 4210.04 MB
Building DataFrame...
Memory usage after building DataFrame: 4210.04 MB
Downloading one image per item...


Downloading images: 100%|██████████| 7523/7523 [29:05<00:00,  4.31it/s]  


Filtering items without downloadable images...
Dataset size after image filtering: 14486 samples
Memory usage after image downloading and filtering: 4210.41 MB
Verifying image storage...
Total images stored: 6306
Building user-item interaction dictionary...
Memory usage after building interactions: 4210.41 MB
Saving processed DataFrame to /kaggle/working/preprocessed_fashion_data.pkl...
Memory usage after saving DataFrame: 4210.41 MB
Saving user-item interactions to /kaggle/working/user_item_interactions.pkl...
Memory usage after saving interactions: 4210.41 MB
Creating ZIP file: /kaggle/working/amazon_fashion_data.zip...
Added images/B00SQ3J25W.jpg to ZIP
Added images/B002WQ5YNS.jpg to ZIP
Added images/B000UPTVKA.jpg to ZIP
Added images/B006JWRDJ4.jpg to ZIP
Added images/B00XHLNA5G.jpg to ZIP
Added images/B0085M5K64.jpg to ZIP
Added images/B0017NQWF6.jpg to ZIP
Added images/B00JGHLG38.jpg to ZIP
Added images/B0012DHHJ6.jpg to ZIP
Added images/B01DVZZS8Y.jpg to ZIP
Added images/B01568Z

In [None]:
import json
import pandas as pd
import requests
import os
import io
from PIL import Image
from tqdm import tqdm
import pickle
from collections import defaultdict
import gc
import psutil
import random
import shutil
import zipfile

# Paths
metadata_path = r"/kaggle/input/amazon-dataset/meta_Clothing_Shoes_and_Jewelry.json"
reviews_path = r"/kaggle/input/amazon-dataset/Clothing_Shoes_and_Jewelry.json"
output_pickle = r"/kaggle/working/preprocessed_fashion_data.pkl"
output_interactions = r"/kaggle/working/user_item_interactions.pkl"
image_folder = "/kaggle/working/images"
output_zip = "/kaggle/working/amazon_fashion_data.zip"

# Clean image folder to free disk space
if os.path.exists(image_folder):
    shutil.rmtree(image_folder)
os.makedirs(image_folder, exist_ok=True)

# Helper: Print memory usage
def print_memory_usage(step=""):
    process = psutil.Process()
    mem_info = process.memory_info()
    print(f"Memory usage after {step}: {mem_info.rss / 1024 ** 2:.2f} MB")

# Helper: Download and save image to disk
def download_and_save_images(urls, asin, idx):
    paths = []
    for i, url in enumerate(urls):
        try:
            response = requests.get(url, timeout=5)
            if response.status_code == 200:
                img = Image.open(io.BytesIO(response.content))
                img.verify()  # Verify image integrity
                img_path = os.path.join(image_folder, f"{asin}_{idx}_{i}.jpg")
                with open(img_path, 'wb') as f:
                    f.write(response.content)
                paths.append(img_path)
        except Exception:
            continue
    return paths

# Load metadata with image and description filtering
print("Loading metadata...")
meta_asin_to_data = {}
valid_asins = set()
with open(metadata_path, 'r', encoding='utf-8') as f:
    for line in tqdm(f, desc="Reading metadata"):
        data = json.loads(line)
        asin = data.get('asin')
        if not asin:
            continue
        description = data.get('description', '')
        if isinstance(description, list):
            description = ' '.join([str(item) for item in description if item])
        elif not isinstance(description, str):
            description = ''
        image_urls = data.get('imageURLHighRes', [])
        # Keep items with non-empty description and at least one image URL
        if description.strip() and image_urls:
            meta_asin_to_data[asin] = {
                'title': data.get('title', ''),
                'description': description,
                'feature': ' '.join(data.get('feature', [])),
                'imageURLs': image_urls,
            }
            valid_asins.add(asin)
print(f"Found {len(valid_asins)} items with description and image URLs")
print_memory_usage("metadata loading")
del data, line
gc.collect()

# Load reviews in chunks, keeping only valid ASINs
print("Loading reviews...")
chunk_size = 50000
all_reviews = []  # Renamed to avoid confusion
with open(reviews_path, 'r', encoding='utf-8') as f:
    reviews_chunk = []
    for line in tqdm(f, desc="Reading reviews"):
        data = json.loads(line)
        asin = data.get('asin')
        if asin in valid_asins:
            reviews_chunk.append({
                'reviewerID': data.get('reviewerID'),
                'asin': asin,
                'reviewText': data.get('reviewText', ''),
                'summary': data.get('summary', ''),
            })
        if len(reviews_chunk) >= chunk_size:
            df_chunk = pd.DataFrame(reviews_chunk)
            reviews_chunk = []  # Clear chunk
            # Filter users and items with >=5 interactions
            user_counts = df_chunk['reviewerID'].value_counts()
            item_counts = df_chunk['asin'].value_counts()
            df_chunk = df_chunk[
                (df_chunk['reviewerID'].isin(user_counts[user_counts >= 5].index)) &
                (df_chunk['asin'].isin(item_counts[item_counts >= 5].index))
            ]
            if not df_chunk.empty:
                # Merge metadata
                df_chunk['title'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['title'])
                df_chunk['description'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['description'])
                df_chunk['feature'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['feature'])
                df_chunk['imageURLs'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['imageURLs'])
                # Concatenate text
                df_chunk['concatenated_text'] = [
                    ' '.join([
                        row['title'] or '',
                        row['description'] or '',
                        row['feature'] or '',
                        row['summary'] or '',
                        row['reviewText'] or ''
                    ]) for _, row in df_chunk.iterrows()
                ]
                all_reviews.extend(df_chunk.to_dict('records'))
            del df_chunk, user_counts, item_counts
            print_memory_usage("processing review chunk")
            gc.collect()
    # Process remaining reviews
    if reviews_chunk:
        df_chunk = pd.DataFrame(reviews_chunk)
        user_counts = df_chunk['reviewerID'].value_counts()
        item_counts = df_chunk['asin'].value_counts()
        df_chunk = df_chunk[
            (df_chunk['reviewerID'].isin(user_counts[user_counts >= 5].index)) &
            (df_chunk['asin'].isin(item_counts[item_counts >= 5].index))
        ]
        if not df_chunk.empty:
            df_chunk['title'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['title'])
            df_chunk['description'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['description'])
            df_chunk['feature'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['feature'])
            df_chunk['imageURLs'] = df_chunk['asin'].map(lambda x: meta_asin_to_data[x]['imageURLs'])
            df_chunk['concatenated_text'] = [
                ' '.join([
                    row['title'] or '',
                    row['description'] or '',
                    row['feature'] or '',
                    row['summary'] or '',
                    row['reviewText'] or ''
                ]) for _, row in df_chunk.iterrows()
            ]
            all_reviews.extend(df_chunk.to_dict('records'))
        del df_chunk, user_counts, item_counts, reviews_chunk
        print_memory_usage("processing final review chunk")
        gc.collect()

# Build dataframe
print("Building dataframe...")
df = pd.DataFrame(all_reviews)
print_memory_usage("building dataframe")
del all_reviews
gc.collect()

# Filter to 5000 users and 4000 items
print("Selecting 5000 users and 4000 items...")
user_counts = df['reviewerID'].value_counts()
item_counts = df['asin'].value_counts()
valid_users = user_counts[user_counts >= 5].index
valid_items = item_counts[item_counts >= 5].index

# Randomly sample 5000 users and 4000 items
random.seed(42)  # For reproducibility
selected_users = random.sample(list(valid_users), min(5000, len(valid_users)))
selected_items = random.sample(list(valid_items), min(4000, len(valid_items)))

df = df[
    (df['reviewerID'].isin(selected_users)) &
    (df['asin'].isin(selected_items))
].reset_index(drop=True)
print_memory_usage("user/item filtering")
del user_counts, item_counts, valid_users, valid_items, selected_users, selected_items
gc.collect()

# Download and save images
print("Downloading and saving images...")
image_paths = []
for idx, (urls, asin) in tqdm(enumerate(zip(df['imageURLs'], df['asin'])), total=len(df)):
    paths = download_and_save_images(urls, asin, idx)
    image_paths.append(paths)
df['image_paths'] = image_paths
print_memory_usage("image downloading")
gc.collect()

# Verify image storage
print("Verifying image storage...")
image_count = len([f for f in os.listdir(image_folder) if f.endswith('.jpg')])
print(f"Total images stored: {image_count}")
if image_count == 0:
    print("Warning: No images were stored. Check image URLs or network connectivity.")

# Filter samples with at least one image
print("Filtering samples without images...")
df = df[df['image_paths'].apply(lambda x: len(x) > 0)].reset_index(drop=True)
print(f"Dataset size after image filtering: {len(df)} samples")
print_memory_usage("filtering images")
gc.collect()

# Build user-item interaction dictionary
print("Building user-item interaction dictionary...")
user_item_dict = defaultdict(list)
for idx, row in df.iterrows():
    user_item_dict[row['reviewerID']].append(
        (row['asin'], row['concatenated_text'], row['image_paths'])
    )
print_memory_usage("building interactions")
gc.collect()

# Save processed dataframe
print(f"Saving processed dataframe to {output_pickle}...")
df.to_pickle(output_pickle)
print_memory_usage("saving dataframe")
gc.collect()

# Save user-item interactions
print(f"Saving user-item interactions to {output_interactions}...")
with open(output_interactions, 'wb') as f:
    pickle.dump(user_item_dict, f)
print_memory_usage("saving interactions")
gc.collect()

# Create ZIP file of all outputs in /kaggle/working
print(f"Creating ZIP file: {output_zip}...")
with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
    # Add the images folder
    for root, _, files in os.walk(image_folder):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, "/kaggle/working")
            zipf.write(file_path, arcname)
            print(f"Added {arcname} to ZIP")
    
    # Add the pickle files
    for file in ["preprocessed_fashion_data.pkl", "user_item_interactions.pkl"]:
        file_path = os.path.join("/kaggle/working", file)
        arcname = os.path.relpath(file_path, "/kaggle/working")
        zipf.write(file_path, arcname)
        print(f"Added {arcname} to ZIP")

# Verify ZIP file size
zip_size = os.path.getsize(output_zip) / (1024 ** 3)  # Size in GiB
print(f"ZIP file created: {output_zip} (Size: {zip_size:.2f} GiB)")
print("You can now download the ZIP file from the Output tab in Kaggle.")

# Optional: Free disk space by deleting images folder
# shutil.rmtree(image_folder)
# print("Deleted images folder to free disk space.")

print(f"✅ Done! Final dataset size: {len(df)} samples, {len(user_item_dict)} users")