In [78]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd
import re
import json
import csv

In [79]:
session = requests.session()

In [80]:
review_url='https://api-gateway.juno.lenskart.com/v2/products/product/206123/review?count=10&page=1'

In [81]:
combined_data = {
    "category_name": "Eyeglasses",
    "product_list": []
}

In [82]:
for page in range(1, 5):
    pageSize = 1000
    pageUrl = f"https://api-gateway.juno.lenskart.com/v2/products/category/3363?page-size={pageSize}&page={page}"
    response = session.get(pageUrl)
   
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        result = {
            "category_name": "Eyeglasses",
            "product_list": data.get("result", {}).get("product_list", [])
        }
       
        with open(f'page{page}.json', 'w') as file:
            json.dump(result, file, indent=2)
       
        combined_data["product_list"].extend(result["product_list"])
    else:
        print(f"Error fetching data for page {page}. Status code: {response.status_code}")
 

In [83]:
# Save the combined data to a new JSON file
with open('combined_data.json', 'w') as combined_file:
    json.dump(combined_data, combined_file, indent=2)
df = pd.DataFrame(combined_data["product_list"])
 
# Safety checks and selecting relevant columns with default values
if not df.empty:
    selected_columns = ["id","product_url", "color", "size", "width", "brand_name_en",
                         "model_name",  "classification", "purchaseCount", "avgRating", "qty"]
   
    # Set default values for missing columns
    default_values = {
        "purchaseCount": 0,
        "avgRating": 0.0,
        "qty": 0
    }
   
    df = df[selected_columns].fillna(default_values)
 
    # Save the DataFrame to a CSV file
    df.to_csv('combined_data.csv', index=False)
else:
    print("No data to write to CSV.")

In [84]:
# Path to your CSV file
csv_file = 'combined_data.csv'

# Function to read the CSV file and extract IDs
def extract_ids_from_csv(csv_file):
    ids = []
    with open(csv_file, newline='') as file:
        reader = csv.DictReader(file)
        for row in reader:
            ids.append(row['id'])
    return ids

# Extracting IDs from the CSV file
ids = extract_ids_from_csv(csv_file)
print(ids)

['209332', '204384', '211450', '138584', '146626', '206123', '131241', '201411', '134946', '213642', '134864', '136181', '211277', '213093', '138231', '215388', '117020', '151270', '215396', '150158', '205972', '211457', '215922', '201400', '131415', '206461', '209285', '213095', '135214', '143314', '202643', '210150', '211928', '211941', '200749', '152838', '208183', '209690', '205968', '215372', '209333', '213217', '201540', '210501', '134861', '132041', '146596', '208180', '151646', '210008', '206136', '215364', '137794', '150160', '151323', '211186', '140634', '212056', '151372', '206463', '209030', '153142', '148342', '136348', '148353', '112988', '151324', '210623', '143910', '211950', '204142', '209684', '215420', '138581', '200678', '151033', '138242', '204144', '206464', '211389', '135940', '212502', '136191', '207042', '211931', '131651', '209331', '136401', '147495', '209222', '210831', '215451', '209301', '210153', '213665', '213661', '212404', '213183', '206962', '211273',

In [85]:
reviews_dict = {}

for i in ids:
    reviewResponse = requests.get(f'https://api-gateway.juno.lenskart.com/v2/products/product/{i}/review?count=10&page=1')
    if reviewResponse.status_code == 200:
        reviewData = reviewResponse.json()
        number_of_reviews = reviewData.get("result", {}).get("numberOfReviews")
        
        # Store the number of reviews in the dictionary with the ID as the key
        reviews_dict[i] = number_of_reviews
        print(number_of_reviews)
# Print the dictionary containing IDs and corresponding number of reviews
print(reviews_dict)
    

None
2
None
18
180
1
108
38
24
7
31
132
None
None
None
None
156
9
None
7
None
None
None
42
90
1
None
None
42
76
33
74
None
None
8
16
3
None
1
None
None
None
45
265
101
193
15
2
2
None
1
None
31
7
9
None
19
None
8
2
None
4
12
72
42
18
9
43
133
None
11
None
None
23
1
14
17
None
2
None
42
221
96
None
1
48
None
13
16
1
None
None
None
1
None
None
None
None
2
None
None
None
None
2
None
None
13
None
None
54
None
None
None
190
1
None
None
None
19
9
None
None
48
None
None
None
None
199
2
99
None
9
62
None
None
None
None
9
12
None
16
9
None
None
None
1
None
None
None
1
1
None
None
20
4
None
132
None
None
9
144
None
8
2
125
None
None
11
None
None
272
7
3
272
2
1
54
None
None
None
None
17
None
None
272
211
2
None
None
30
None
272
None
None
88
None
None
None
14
113
None
51
None
108
4
2
159
22
None
None
None
None
2
None
33
7
6
None
3
None
None
None
3
None
None
None
4
10
None
None
4
None
146
None
170
6
62
1
1
None
None
None
23
None
None
122
None
53
3
None
None
None
16
5
None
None
None
10
None
None
No

In [86]:
csv_filename = "reviews_data.csv"

with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
    fieldnames = ['id', 'reviewId', 'reviewTitle', 'reviewDetail', 'reviewee', 'noOfStars', 'reviewDate', 'reviewerType']
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()

    for i in ids:
        reviewResponse = requests.get(f'https://api-gateway.juno.lenskart.com/v2/products/product/{i}/review?count={reviews_dict[i]}')
        if reviewResponse.status_code == 200:
            reviewData = reviewResponse.json()
            reviews = reviewData['result']['review']['reviews']
            for review in reviews:
                writer.writerow({
                    'id': i,
                    'reviewId': review['reviewId'],
                    'reviewTitle': review['reviewTitle'],
                    'reviewDetail': review['reviewDetail'],
                    'reviewee': review['reviewee'],
                    'noOfStars': review['noOfStars'],
                    'reviewDate': review['reviewDate'],
                    'reviewerType': review['reviewerType']
                })

print("CSV write operation completed successfully.")
        

CSV write operation completed successfully.
