In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=motorola+mobile&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&sort=recency_desc&p%5B%5D=facets.price_range.from%3D20000&p%5B%5D=facets.price_range.to%3DMax"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\motorola.csv"
df.to_csv(Output_path, index=False)


In [8]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price
0,"MOTOROLA Razar 50 Flip (Beach Sand, 256 GB)",https://www.flipkart.com/motorola-razar-50-fli...,"‚Çπ69,990\n‚Çπ79,99012% off"
1,"MOTOROLA Edge 50 (Peach Fuzz, 256 GB)",https://www.flipkart.com/motorola-edge-50-peac...,"‚Çπ26,999\n‚Çπ32,99918% off"
2,"MOTOROLA Edge 50 (Koala Grey, 256 GB)",https://www.flipkart.com/motorola-edge-50-koal...,"‚Çπ26,999\n‚Çπ32,99918% off"
3,"MOTOROLA Edge 50 Neo (PANTONE Poinciana, 256 GB)",https://www.flipkart.com/motorola-edge-50-neo-...,"‚Çπ23,999\n‚Çπ29,99920% off"
4,"MOTOROLA Edge 50 Neo (PANTONE Nautical Blue, 2...",https://www.flipkart.com/motorola-edge-50-neo-...,"‚Çπ23,999\n‚Çπ29,99920% off"
...,...,...,...
88,"MOTOROLA Moto X4 (Super Black, 32 GB)",https://www.flipkart.com/motorola-moto-x4-supe...,"‚Çπ21,999"
89,MOTOROLA Moto X (2nd Generation) (Black Leathe...,https://www.flipkart.com/motorola-moto-x-2nd-g...,"‚Çπ23,999"
90,"(Refurbished) MOTOROLA Edge 40 (Viva Magenta, ...",https://www.flipkart.com/refurbished-motorola-...,"‚Çπ21,299\n‚Çπ34,99939% off"
91,(Refurbished) MOTOROLA Edge 50 Pro 5G (Luxe La...,https://www.flipkart.com/refurbished-motorola-...,"‚Çπ22,999\n‚Çπ36,99937% off"


In [9]:
df.info

<bound method DataFrame.info of                                          Product_Name  \
0         MOTOROLA Razar 50 Flip (Beach Sand, 256 GB)   
1               MOTOROLA Edge 50 (Peach Fuzz, 256 GB)   
2               MOTOROLA Edge 50 (Koala Grey, 256 GB)   
3    MOTOROLA Edge 50 Neo (PANTONE Poinciana, 256 GB)   
4   MOTOROLA Edge 50 Neo (PANTONE Nautical Blue, 2...   
..                                                ...   
88              MOTOROLA Moto X4 (Super Black, 32 GB)   
89  MOTOROLA Moto X (2nd Generation) (Black Leathe...   
90  (Refurbished) MOTOROLA Edge 40 (Viva Magenta, ...   
91  (Refurbished) MOTOROLA Edge 50 Pro 5G (Luxe La...   
92  (Refurbished) MOTOROLA Edge 30 Ultra (Interste...   

                                         Product_Link            Product_Price  
0   https://www.flipkart.com/motorola-razar-50-fli...  ‚Çπ69,990\n‚Çπ79,99012% off  
1   https://www.flipkart.com/motorola-edge-50-peac...  ‚Çπ26,999\n‚Çπ32,99918% off  
2   https://www.flipkart.com/mot

In [10]:
import re

# Function to extract and clean the price (remove rupee symbol and commas)
def extract_clean_price(price_string):
    # Find the rupee symbol followed by the price
    match = re.search(r'‚Çπ(\d[\d,]*)', price_string)
    if match:
        # Remove the rupee symbol and commas, and convert to an integer
        return int(match.group(1).replace(',', ''))
    return None

# Apply the function to the 'Price' column
df['Product_Price'] = df['Product_Price'].apply(extract_clean_price)

In [11]:
products_to_remove = ['(Refurbished) MOTOROLA Edge 40 (Viva Magenta, 256 GB)', '(Refurbished) MOTOROLA Edge 50 Pro 5G (Luxe Lavender, 256 GB)', 
                      '(Refurbished) MOTOROLA Edge 30 Ultra (Interstellar Black, 256 GB)', '(Refurbished) MOTOROLA Edge 20 Pro 5G (Iridescent Cloud, 128 GB)']
# Drop rows where Product_Name is in the list of products to remove
df = df[~df['Product_Name'].isin(products_to_remove)]

In [12]:
df = df[df['Product_Price'] <= 40000]
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\motorola_cleaned.csv"
df.to_csv(Output_path, index=False)

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=mobile+phone&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&as-pos=2&as-type=HISTORY&suggestionId=mobile+phone%7CMobiles&requestId=c16475bf-836e-4831-8ef8-1cbd781451ed&p%5B%5D=facets.brand%255B%255D%3DOnePlus"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\oneplus.csv"
df.to_csv(Output_path, index=False)
df.to_csv('flipkart_scrape_oneplus.csv', index=False)  # Save the scraped data to a CSV file

Error sending stats to Plausible: error sending request for url (https://plausible.io/api/event)


In [2]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price
0,"OnePlus N20 SE (CELESTIAL BLACK, 128 GB)",https://www.flipkart.com/oneplus-n20-se-celest...,"‚Çπ10,988\n‚Çπ19,99945% off"
1,"OnePlus Nord CE4 (Celadon Marble, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-cela...,"‚Çπ24,576\n‚Çπ24,9991% off"
2,"OnePlus Nord CE4 (Dark Chrome, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-dark...,"‚Çπ24,897\n‚Çπ24,999"
3,"OnePlus Nord 3 5G (Tempest Gray, 128 GB)",https://www.flipkart.com/oneplus-nord-3-5g-tem...,"‚Çπ28,980\n‚Çπ33,99914% off"
4,"OnePlus Nord CE4 (Dark Chrome, 256 GB)",https://www.flipkart.com/oneplus-nord-ce4-dark...,Price: Not Available
5,"OnePlus Nord (Gray Onyx, 256 GB)",https://www.flipkart.com/oneplus-nord-gray-ony...,"‚Çπ28,799\n‚Çπ29,9994% off"
6,"OnePlus Nord CE4 (Celadon Marble, 256 GB)",https://www.flipkart.com/oneplus-nord-ce4-cela...,"‚Çπ23,995\n‚Çπ26,99911% off"
7,"OnePlus Nord (Gray Onyx, 128 GB)",https://www.flipkart.com/oneplus-nord-gray-ony...,"‚Çπ24,600\n‚Çπ27,99912% off"
8,"OnePlus Nord (Blue Marble, 128 GB)",https://www.flipkart.com/oneplus-nord-blue-mar...,"‚Çπ24,790\n‚Çπ27,99911% off"
9,"OnePlus Nord (Blue Marble, 256 GB)",https://www.flipkart.com/oneplus-nord-blue-mar...,"‚Çπ21,999\n‚Çπ29,99926% off"


In [6]:
df=pd.read_csv(r"C:\Users\Elakkiya\Downloads\flipkart\oneplus.csv")

In [7]:
import re

# Function to extract and clean the price (remove rupee symbol and commas)
def extract_clean_price(price_string):
    # Find the rupee symbol followed by the price
    match = re.search(r'‚Çπ(\d[\d,]*)', price_string)
    if match:
        # Remove the rupee symbol and commas, and convert to an integer
        return int(match.group(1).replace(',', ''))
    return None

# Apply the function to the 'Price' column
df['Product_Price'] = df['Product_Price'].apply(extract_clean_price)

In [9]:
df = df[(df['Product_Price'] > 20000) & (df['Product_Price'] <= 40000)]
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\oneplus_cleaned.csv"
df.to_csv(Output_path, index=False)

In [12]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=mobile+phone&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobile+phone%7CMobiles&requestId=9c7ecdfa-25d5-4ca3-8a0b-18f1e30dd1d4&p%5B%5D=facets.brand%255B%255D%3DOPPO"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\oppo.csv"
df.to_csv(Output_path, index=False)

In [13]:
import re

# Function to extract and clean the price (remove rupee symbol and commas)
def extract_clean_price(price_string):
    # Find the rupee symbol followed by the price
    match = re.search(r'‚Çπ(\d[\d,]*)', price_string)
    if match:
        # Remove the rupee symbol and commas, and convert to an integer
        return int(match.group(1).replace(',', ''))
    return None

# Apply the function to the 'Price' column
df['Product_Price'] = df['Product_Price'].apply(extract_clean_price)

In [14]:
df = df[(df['Product_Price'] > 20000) & (df['Product_Price'] <= 40000)]
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\oppo_cleaned.csv"
df.to_csv(Output_path, index=False)

In [9]:
import pandas as pd

# Load the CSV file containing product links
file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oneplus_cleaned.csv"
df = pd.read_csv(file_path)

# List of product names to be dropped
products_to_drop = [
    'OnePlus Nord (Gray Onyx, 256 GB)',
    'OnePlus Nord (Gray Onyx, 128 GB)',
    'OnePlus Nord (Gray Onyx, 64 GB)'
]

# Drop rows with the specified product names
df_cleaned = df[~df['Product_Name'].isin(products_to_drop)]

# Save the cleaned DataFrame back to a CSV file
cleaned_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oneplus_cleaned_dropped.csv"
df_cleaned.to_csv(cleaned_file_path, index=False)

print(f"Rows with specified product names dropped and cleaned data saved to {cleaned_file_path}")


Rows with specified product names dropped and cleaned data saved to C:\Users\Elakkiya\Downloads\flipkart\oneplus_cleaned_dropped.csv


In [16]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

# Load the CSV file containing product links
file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oneplus_cleaned_dropped.csv"
df = pd.read_csv(file_path)

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Function to extract reviews and ratings from a product page
def extract_reviews_and_ratings(driver):
    reviews = []
    ratings = []

    try:
        # Wait for the reviews section to load
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ZmyHeo')))

        # Extract reviews
        review_elements = driver.find_elements(By.CLASS_NAME, "ZmyHeo")
        for element in review_elements:
            try:
                # Click "Read More" if available
                read_more = element.find_elements(By.CLASS_NAME, "b4x-fr")
                if read_more:
                    driver.execute_script("arguments[0].click();", read_more[0])
                    time.sleep(1)  # Wait for the full review to load
                
                reviews.append(element.text)
            except StaleElementReferenceException:
                continue

        # Extract star ratings
        rating_elements = driver.find_elements(By.CLASS_NAME, "XQDdHH.Ga3i8K")
        for i in range(max(len(reviews), len(rating_elements))):
            # Append ratings or None if not available
            if i < len(rating_elements):
                ratings.append(rating_elements[i].text)
            else:
                ratings.append(None)  # Placeholder for missing ratings

    except TimeoutException:
        print("Timed out waiting for reviews to load")

    return reviews, ratings

# Function to load the page with the correct page number in the URL
def load_page(driver, url):
    driver.get(url)
    try:
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ZmyHeo')))
    except TimeoutException:
        print(f"Timed out waiting for page {url} to load")

# Scrape reviews and ratings for all product links
all_data = []

num_pages_reviews = 20  # Number of review pages to scrape per product

# Loop through each product link in the DataFrame
for index, product_link in df['Product_Link'].items():
    if pd.isna(product_link) or product_link.strip() == "":
        print(f"Skipping empty product link at index {index + 1}.")
        continue

    print(f"Scraping product {index + 1}/{len(df)}: {product_link}")
    
    # Scrape reviews for the current product
    driver.get(product_link)
    time.sleep(5)  # Wait for the product page to load

    # Click on the 'All Reviews' button if it exists
    try:
        wait = WebDriverWait(driver, 20)
        all_reviews_button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, '_23J90q.RcXBOT')))
        all_reviews_button.click()
        time.sleep(5)  # Wait for the reviews page to load
    except TimeoutException:
        print(f"Timed out waiting for 'All Reviews' button to load for {product_link}")
    except Exception as e:
        print(f"Could not find 'All Reviews' button for {product_link}: {e}")

    # Scrape reviews and ratings across multiple review pages
    for page in range(1, num_pages_reviews + 1):
        if page > 1:
            # Update the URL to navigate to the next page of reviews
            page_url = f"{driver.current_url}&page={page}"
            load_page(driver, page_url)

        reviews, ratings = extract_reviews_and_ratings(driver)
        
        # Check if reviews are empty, and if so, stop scraping further pages and move to the next product
        if not reviews:
            print(f"No reviews found on page {page} for {product_link}. Moving to next product.")
            break

        # Append reviews and ratings to the all_data list along with the product link
        for review, rating in zip(reviews, ratings):
            all_data.append({
                'Product_Link': product_link,
                'Review': review,
                'Rating': rating
            })

        time.sleep(5)  # Wait before loading the next reviews page

# Close the driver after scraping is complete
driver.quit()

# Convert the collected data into a DataFrame and save it as a CSV file
result_df = pd.DataFrame(all_data)

# Save to CSV
output_file = r"C:\Users\Elakkiya\Downloads\flipkart\oneplus_reviews_ratings.csv"
result_df.to_csv(output_file, index=False)

print(f"Scraping complete. Data saved to {output_file}")


Scraping product 1/21: https://www.flipkart.com/oneplus-nord-ce4-celadon-marble-128-gb/p/itm5a09089114afb?pid=MOBGZN8YJ4KZ2KNH&lid=LSTMOBGZN8YJ4KZ2KNHH3PCNN&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&iid=7d930946-5029-430d-acb7-6cd08741df59.MOBGZN8YJ4KZ2KNH.SEARCH&ssid=jndt1q4pb40000001727628684372&qH=37695f7554f510f0
Scraping product 2/21: https://www.flipkart.com/oneplus-nord-ce4-dark-chrome-128-gb/p/itm5a09089114afb?pid=MOBGZNH6QUUVZGZN&lid=LSTMOBGZNH6QUUVZGZNZOU9B7&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_3&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&iid=7d930946-5029-430d-acb7-6cd08741df59.MOBGZNH6QUUVZGZN.SEARCH&ssid=jndt1q4pb40000001727628684372&qH=37695f7554f510f0
Scraping product 3/21: https://www.flipkart.com/oneplus-nord-3-5g-tempest-gray-128-gb/p/itm5fc87a

In [17]:
result_df

Unnamed: 0,Product_Link,Review,Rating
0,https://www.flipkart.com/oneplus-nord-ce4-cela...,Guys if you want big battery with super fast c...,5
1,https://www.flipkart.com/oneplus-nord-ce4-cela...,It's A Very Good Mobile Value For Money\n\nRat...,5
2,https://www.flipkart.com/oneplus-nord-ce4-cela...,Awesome üëç thanks OnePlus,5
3,https://www.flipkart.com/oneplus-nord-ce4-cela...,Best phone at this price range üíï,5
4,https://www.flipkart.com/oneplus-nord-ce4-cela...,Not bad,4
...,...,...,...
1470,https://www.flipkart.com/oneplus-7t-frosted-si...,This phone is great,5
1471,https://www.flipkart.com/oneplus-7t-frosted-si...,great device and who else wanted it for gaming...,5
1472,https://www.flipkart.com/oneplus-7t-frosted-si...,"Excellent device , best for movies on speakers...",5
1473,https://www.flipkart.com/oneplus-7t-frosted-si...,THIS PHONE IS A DEVIL.....IT'S SO SO ASWOOME ....,5


In [18]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

# Load the CSV file containing product links
file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oppo_cleaned.csv"
df = pd.read_csv(file_path)

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Function to extract reviews and ratings from a product page
def extract_reviews_and_ratings(driver):
    reviews = []
    ratings = []

    try:
        # Wait for the reviews section to load
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ZmyHeo')))

        # Extract reviews
        review_elements = driver.find_elements(By.CLASS_NAME, "ZmyHeo")
        for element in review_elements:
            try:
                # Click "Read More" if available
                read_more = element.find_elements(By.CLASS_NAME, "b4x-fr")
                if read_more:
                    driver.execute_script("arguments[0].click();", read_more[0])
                    time.sleep(1)  # Wait for the full review to load
                
                reviews.append(element.text)
            except StaleElementReferenceException:
                continue

        # Extract star ratings
        rating_elements = driver.find_elements(By.CLASS_NAME, "XQDdHH.Ga3i8K")
        for i in range(max(len(reviews), len(rating_elements))):
            # Append ratings or None if not available
            if i < len(rating_elements):
                ratings.append(rating_elements[i].text)
            else:
                ratings.append(None)  # Placeholder for missing ratings

    except TimeoutException:
        print("Timed out waiting for reviews to load")

    return reviews, ratings

# Function to load the page with the correct page number in the URL
def load_page(driver, url):
    driver.get(url)
    try:
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ZmyHeo')))
    except TimeoutException:
        print(f"Timed out waiting for page {url} to load")

# Scrape reviews and ratings for all product links
all_data = []

num_pages_reviews = 20  # Number of review pages to scrape per product

# Loop through each product link in the DataFrame
for index, product_link in df['Product_Link'].items():
    if pd.isna(product_link) or product_link.strip() == "":
        print(f"Skipping empty product link at index {index + 1}.")
        continue

    print(f"Scraping product {index + 1}/{len(df)}: {product_link}")
    
    # Scrape reviews for the current product
    driver.get(product_link)
    time.sleep(5)  # Wait for the product page to load

    # Click on the 'All Reviews' button if it exists
    try:
        wait = WebDriverWait(driver, 20)
        all_reviews_button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, '_23J90q.RcXBOT')))
        all_reviews_button.click()
        time.sleep(5)  # Wait for the reviews page to load
    except TimeoutException:
        print(f"Timed out waiting for 'All Reviews' button to load for {product_link}")
    except Exception as e:
        print(f"Could not find 'All Reviews' button for {product_link}: {e}")

    # Scrape reviews and ratings across multiple review pages
    for page in range(1, num_pages_reviews + 1):
        if page > 1:
            # Update the URL to navigate to the next page of reviews
            page_url = f"{driver.current_url}&page={page}"
            load_page(driver, page_url)

        reviews, ratings = extract_reviews_and_ratings(driver)
        
        # Check if reviews are empty, and if so, stop scraping further pages and move to the next product
        if not reviews:
            print(f"No reviews found on page {page} for {product_link}. Moving to next product.")
            break

        # Append reviews and ratings to the all_data list along with the product link
        for review, rating in zip(reviews, ratings):
            all_data.append({
                'Product_Link': product_link,
                'Review': review,
                'Rating': rating
            })

        time.sleep(5)  # Wait before loading the next reviews page

# Close the driver after scraping is complete
driver.quit()

# Convert the collected data into a DataFrame and save it as a CSV file
result_df = pd.DataFrame(all_data)

# Save to CSV
output_file = r"C:\Users\Elakkiya\Downloads\flipkart\oppo_reviews_ratings.csv"
result_df.to_csv(output_file, index=False)

print(f"Scraping complete. Data saved to {output_file}")


Scraping product 1/21: https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&iid=a9c6207a-787f-4226-bb68-9f0c056e362a.MOBGZFDWBNU6AM4U.SEARCH&ssid=dafaamg05c0000001727725784470&qH=37695f7554f510f0
Scraping product 2/21: https://www.flipkart.com/oppo-f25-pro-5g-ocean-blue-256-gb/p/itm9451b9c477991?pid=MOBGXX3V666RMFVU&lid=LSTMOBGXX3V666RMFVUEEDWBP&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&iid=a9c6207a-787f-4226-bb68-9f0c056e362a.MOBGXX3V666RMFVU.SEARCH&ssid=dafaamg05c0000001727725784470&qH=37695f7554f510f0
Scraping product 3/21: https://www.flipkart.com/oppo-f25-pro-5g-ocean-blue-128-gb/p/itm9451b9c477991?pid=M

In [19]:
result_df

Unnamed: 0,Product_Link,Review,Rating
0,https://www.flipkart.com/oppo-f25-pro-5g-coral...,Phone is nice but need improvement in camera p...,4
1,https://www.flipkart.com/oppo-f25-pro-5g-coral...,"Blue color is an amaziong shocker, shines attr...",5
2,https://www.flipkart.com/oppo-f25-pro-5g-coral...,Very nice product...but battery issue,4
3,https://www.flipkart.com/oppo-f25-pro-5g-coral...,"Read some reviews, using this phone in dark mo...",5
4,https://www.flipkart.com/oppo-f25-pro-5g-coral...,Disighn display camera battery performance super,5
...,...,...,...
2152,https://www.flipkart.com/oppo-reno11-pro-5g-ro...,good phone love it,5
2153,https://www.flipkart.com/oppo-reno11-pro-5g-ro...,Great looks,5
2154,https://www.flipkart.com/oppo-reno11-pro-5g-ro...,Front camera is not good lost of money,1
2155,https://www.flipkart.com/oppo-reno11-pro-5g-ro...,Super,5


In [1]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

# Load the CSV file containing product links
file_path = r"C:\Users\Elakkiya\Downloads\flipkart\motorola_cleaned.csv"
df = pd.read_csv(file_path)

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Function to extract reviews and ratings from a product page
def extract_reviews_and_ratings(driver):
    reviews = []
    ratings = []

    try:
        # Wait for the reviews section to load
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ZmyHeo')))

        # Extract reviews
        review_elements = driver.find_elements(By.CLASS_NAME, "ZmyHeo")
        for element in review_elements:
            try:
                # Click "Read More" if available
                read_more = element.find_elements(By.CLASS_NAME, "b4x-fr")
                if read_more:
                    driver.execute_script("arguments[0].click();", read_more[0])
                    time.sleep(1)  # Wait for the full review to load
                
                reviews.append(element.text)
            except StaleElementReferenceException:
                continue

        # Extract star ratings
        rating_elements = driver.find_elements(By.CLASS_NAME, "XQDdHH.Ga3i8K")
        for i in range(max(len(reviews), len(rating_elements))):
            # Append ratings or None if not available
            if i < len(rating_elements):
                ratings.append(rating_elements[i].text)
            else:
                ratings.append(None)  # Placeholder for missing ratings

    except TimeoutException:
        print("Timed out waiting for reviews to load")

    return reviews, ratings

# Function to load the page with the correct page number in the URL
def load_page(driver, url):
    driver.get(url)
    try:
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ZmyHeo')))
    except TimeoutException:
        print(f"Timed out waiting for page {url} to load")

# Scrape reviews and ratings for all product links
all_data = []

num_pages_reviews = 5  # Number of review pages to scrape per product

# Loop through each product link in the DataFrame
for index, product_link in df['Product_Link'].items():
    if pd.isna(product_link) or product_link.strip() == "":
        print(f"Skipping empty product link at index {index + 1}.")
        continue

    print(f"Scraping product {index + 1}/{len(df)}: {product_link}")
    
    # Scrape reviews for the current product
    driver.get(product_link)
    time.sleep(5)  # Wait for the product page to load

    # Click on the 'All Reviews' button if it exists
    try:
        wait = WebDriverWait(driver, 20)
        all_reviews_button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, '_23J90q.RcXBOT')))
        all_reviews_button.click()
        time.sleep(5)  # Wait for the reviews page to load
    except TimeoutException:
        print(f"Timed out waiting for 'All Reviews' button to load for {product_link}")
    except Exception as e:
        print(f"Could not find 'All Reviews' button for {product_link}: {e}")

    # Scrape reviews and ratings across multiple review pages
    for page in range(1, num_pages_reviews + 1):
        if page > 1:
            # Update the URL to navigate to the next page of reviews
            page_url = f"{driver.current_url}&page={page}"
            load_page(driver, page_url)

        reviews, ratings = extract_reviews_and_ratings(driver)
        
        # Check if reviews are empty, and if so, stop scraping further pages and move to the next product
        if not reviews:
            print(f"No reviews found on page {page} for {product_link}. Moving to next product.")
            break

        # Append reviews and ratings to the all_data list along with the product link
        for review, rating in zip(reviews, ratings):
            all_data.append({
                'Product_Link': product_link,
                'Review': review,
                'Rating': rating
            })

        time.sleep(5)  # Wait before loading the next reviews page

# Close the driver after scraping is complete
driver.quit()

# Convert the collected data into a DataFrame and save it as a CSV file
result_df = pd.DataFrame(all_data)

# Save to CSV
output_file = r"C:\Users\Elakkiya\Downloads\flipkart\motorola_reviews_ratings.csv"
result_df.to_csv(output_file, index=False)

print(f"Scraping complete. Data saved to {output_file}")


Scraping product 1/73: https://www.flipkart.com/motorola-edge-50-peach-fuzz-256-gb/p/itmbb9d98d3f286b?pid=MOBH2Q5YS97TYGFJ&lid=LSTMOBH2Q5YS97TYGFJQS8C5F&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_1_2&otracker=search&otracker1=search&iid=ffcd72d0-9d0c-480d-b95d-75100bed861d.MOBH2Q5YS97TYGFJ.SEARCH&ssid=k4arziq2z40000001727590041372&qH=979dd593b8c05a74
Scraping product 2/73: https://www.flipkart.com/motorola-edge-50-koala-grey-256-gb/p/itm31f5f025267eb?pid=MOBH2Q5YDBQAZEZ5&lid=LSTMOBH2Q5YDBQAZEZ5NJN25F&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_1_3&otracker=search&otracker1=search&iid=ffcd72d0-9d0c-480d-b95d-75100bed861d.MOBH2Q5YDBQAZEZ5.SEARCH&ssid=k4arziq2z40000001727590041372&qH=979dd593b8c05a74
Scraping product 3/73: https://www.flipkart.com/motorola-edge-50-neo-pantone-poinciana-256-gb/p/itm1b6c43661a1fc?pid=MOBHFHDRZJP6FHPZ&lid=LSTMOBHFHDRZJP6FHPZ0HNLSP&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_1_4&otracker=search&otracker1=

In [2]:
result_df

Unnamed: 0,Product_Link,Review,Rating
0,https://www.flipkart.com/motorola-edge-50-peac...,"Pros : Crisp display, Camera performed way bet...",4
1,https://www.flipkart.com/motorola-edge-50-peac...,This is one day review\n\nPros:\nAwesome desig...,4
2,https://www.flipkart.com/motorola-edge-50-peac...,I could recommend it as the best in its price ...,5
3,https://www.flipkart.com/motorola-edge-50-peac...,Camera is best but mobile phon heating much .,4
4,https://www.flipkart.com/motorola-edge-50-peac...,Your phone comes with RAM boost ON when you ge...,5
...,...,...,...
3551,https://www.flipkart.com/motorola-moto-x-2nd-g...,I have been big fan of Moto series and have us...,4
3552,https://www.flipkart.com/motorola-moto-x-2nd-g...,phone design is very eye catching !! love at f...,4
3553,https://www.flipkart.com/motorola-moto-x-2nd-g...,Well this has been the best phone that I have ...,4
3554,https://www.flipkart.com/motorola-moto-x-2nd-g...,The Good: The Motorola Moto X has a sharp 1080...,4


In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=mobile+phone&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobile+phone%7CMobiles&requestId=9c7ecdfa-25d5-4ca3-8a0b-18f1e30dd1d4&p%5B%5D=facets.brand%255B%255D%3DHonor"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\Honor.csv"
df.to_csv(Output_path, index=False)


In [7]:
df=pd.read_csv(r"C:\Users\Elakkiya\Downloads\flipkart\Honor.csv")

In [8]:
import re

# Function to extract and clean the price (remove rupee symbol and commas)
def extract_clean_price(price_string):
    # Find the rupee symbol followed by the price
    match = re.search(r'‚Çπ(\d[\d,]*)', price_string)
    if match:
        # Remove the rupee symbol and commas, and convert to an integer
        return int(match.group(1).replace(',', ''))
    return None

# Apply the function to the 'Price' column
df['Product_Price'] = df['Product_Price'].apply(extract_clean_price)

In [6]:
df = df[df['Product_Price'] <= 40000]
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\Honor_cleaned.csv"
df.to_csv(Output_path, index=False)

In [11]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=mobile+phone&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobile+phone%7CMobiles&requestId=9c7ecdfa-25d5-4ca3-8a0b-18f1e30dd1d4&p%5B%5D=facets.brand%255B%255D%3DGoogle"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\google.csv"
df.to_csv(Output_path, index=False)


In [12]:
import re

# Function to extract and clean the price (remove rupee symbol and commas)
def extract_clean_price(price_string):
    # Find the rupee symbol followed by the price
    match = re.search(r'‚Çπ(\d[\d,]*)', price_string)
    if match:
        # Remove the rupee symbol and commas, and convert to an integer
        return int(match.group(1).replace(',', ''))
    return None

# Apply the function to the 'Price' column
df['Product_Price'] = df['Product_Price'].apply(extract_clean_price)

In [13]:
df = df[df['Product_Price'] <= 40000]
Output_path = r"C:\Users\Elakkiya\Downloads\flipkart\google_cleaned.csv"
df.to_csv(Output_path, index=False)

In [14]:
import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\motorola_cleaned.csv"
reviews_ratings_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\motorola_reviews_ratings.csv"
output_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\motorola_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")


Merged file saved to C:\Users\Elakkiya\Downloads\flipkart\motorola_merged.csv


In [15]:
df=pd.read_csv(r"C:\Users\Elakkiya\Downloads\flipkart\motorola_merged.csv")

In [18]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,"MOTOROLA Edge 50 (Peach Fuzz, 256 GB)",https://www.flipkart.com/motorola-edge-50-peach-fuzz-256-gb/p/itmbb9d98d3f286b?pid=MOBH2Q5YS97TYGFJ&lid=LSTMOBH2Q5YS97TYGFJQS8C5F&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_1_2&otracker=search&otracker1=search&iid=ffcd72d0-9d0c-480d-b95d-75100bed861d.MOBH2Q5YS97TYGFJ.SEARCH&ssid=k4arziq2z40000001727590041372&qH=979dd593b8c05a74,26999,"Pros : Crisp display, Camera performed way better than my expectations. Battery lasted for 1.5 days after decent usage. Face detection and fingerprint sensors works swiftly.\n\nCons : Heating issue when camera is kept on for 10 mins and beyond. Camera app takes 3-5 secs to start which feels like phone is hung. No inbuilt gallery app, you have to use google photos app",4.0
1,"MOTOROLA Edge 50 (Peach Fuzz, 256 GB)",https://www.flipkart.com/motorola-edge-50-peach-fuzz-256-gb/p/itmbb9d98d3f286b?pid=MOBH2Q5YS97TYGFJ&lid=LSTMOBH2Q5YS97TYGFJQS8C5F&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_1_2&otracker=search&otracker1=search&iid=ffcd72d0-9d0c-480d-b95d-75100bed861d.MOBH2Q5YS97TYGFJ.SEARCH&ssid=k4arziq2z40000001727590041372&qH=979dd593b8c05a74,26999,This is one day review\n\nPros:\nAwesome design\nAwesome camera\nAwesome UI\nFast Charging\n\nCons:\nPhone heating\nBattery falling down easier,4.0
2,"MOTOROLA Edge 50 (Peach Fuzz, 256 GB)",https://www.flipkart.com/motorola-edge-50-peach-fuzz-256-gb/p/itmbb9d98d3f286b?pid=MOBH2Q5YS97TYGFJ&lid=LSTMOBH2Q5YS97TYGFJQS8C5F&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_1_2&otracker=search&otracker1=search&iid=ffcd72d0-9d0c-480d-b95d-75100bed861d.MOBH2Q5YS97TYGFJ.SEARCH&ssid=k4arziq2z40000001727590041372&qH=979dd593b8c05a74,26999,"I could recommend it as the best in its price range.\n1. Very clean UI and great curvy display\n2. Very comfortable, premium and light design\n3. Fast charging (might heat up if you dbt turn n fast charging option)\n4. Dolby Atmos is top notch beating other phones\n5. The camera is color accurate and must try horizon lock (attached few photos for example)\n6. If you rarely game then it's the perfect phone for you. Yes even you can play games smoothly at quite high settings.\n7. Battery drain is less and good fast charging.\n8. Quiet sturdy phone indeed.\n\nA MUST buy phone.",5.0
3,"MOTOROLA Edge 50 (Peach Fuzz, 256 GB)",https://www.flipkart.com/motorola-edge-50-peach-fuzz-256-gb/p/itmbb9d98d3f286b?pid=MOBH2Q5YS97TYGFJ&lid=LSTMOBH2Q5YS97TYGFJQS8C5F&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_1_2&otracker=search&otracker1=search&iid=ffcd72d0-9d0c-480d-b95d-75100bed861d.MOBH2Q5YS97TYGFJ.SEARCH&ssid=k4arziq2z40000001727590041372&qH=979dd593b8c05a74,26999,Camera is best but mobile phon heating much .,4.0
4,"MOTOROLA Edge 50 (Peach Fuzz, 256 GB)",https://www.flipkart.com/motorola-edge-50-peach-fuzz-256-gb/p/itmbb9d98d3f286b?pid=MOBH2Q5YS97TYGFJ&lid=LSTMOBH2Q5YS97TYGFJQS8C5F&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_1_2&otracker=search&otracker1=search&iid=ffcd72d0-9d0c-480d-b95d-75100bed861d.MOBH2Q5YS97TYGFJ.SEARCH&ssid=k4arziq2z40000001727590041372&qH=979dd593b8c05a74,26999,Your phone comes with RAM boost ON when you get it. Turning it off when you get it in your hands will make it run smoother and the heating issue will resolve.,5.0
...,...,...,...,...,...
3552,"MOTOROLA Moto X (2nd Generation) (Black Leather, 16 GB)",https://www.flipkart.com/motorola-moto-x-2nd-generation-black-leather-16-gb/p/itmevt7pswhqcw2y?pid=MOBDZ7WZFXPXWFE8&lid=LSTMOBDZ7WZFXPXWFE86AXC5L&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_4_90&otracker=search&otracker1=search&fm=Search&iid=e40aeca6-bdb4-45cb-9233-95d7621eb655.MOBDZ7WZFXPXWFE8.SEARCH&ppt=sp&ppn=sp&ssid=lt1jlomp4w0000001727590086370&qH=979dd593b8c05a74,23999,"I have been big fan of Moto series and have used all Moto E, Moto G and now bought Moto X (second Gen). i bought this beast on big billion day by exchanging with Moto E and received this by 11th October (my effective cost is Rs. 25K after considering Rs 2000 cash back, at this price its not a bad deal at all), so it has been around 2 weeks of use right now.\n\nNow following are pros per my experience:\n\n1. Display pretty good but i think LCD display is more accurate and comes with more natural colours.\n\n2. performance is excellent, no lag or any issues in terms of speed since this is kind of pure android experience with top notch hardware configuration.\n\n3. Build quality is excellent.\n\n4 Moto assist, Moto Voice, Moto display is good experience and most of the time gives the desired result.\n\n5. Very fast in getting the updates and high probablity of getting the Updates (Lollipop update should come, i guess in the second week in the Moto X)\n\n6. Its nice to have Stock android experience with no Bloatware and skins (can be customised as per our wishe)\n\nCons:\n\n1. Heating issues while charging or playing high graphics game.\n2. Battery is not sufficient (with average usage though you can manage 1 day use, but being the flagship phone this is not sufficient)\n3. Camera is average considering the price. (in the daylight the pictures taken are fine but in night its very noisy)\n4. Storage space ~10 GB is not sufficient at all, if you take some 4K video then u can understand the importance of this)\n\nSo, overall i can say that even after considering some issues i love Moto X and i am quite happy on my decision to buy this mobile. (Moto X price may decline in next 1-2 months as per my understanding)",4.0
3553,"MOTOROLA Moto X (2nd Generation) (Black Leather, 16 GB)",https://www.flipkart.com/motorola-moto-x-2nd-generation-black-leather-16-gb/p/itmevt7pswhqcw2y?pid=MOBDZ7WZFXPXWFE8&lid=LSTMOBDZ7WZFXPXWFE86AXC5L&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_4_90&otracker=search&otracker1=search&fm=Search&iid=e40aeca6-bdb4-45cb-9233-95d7621eb655.MOBDZ7WZFXPXWFE8.SEARCH&ppt=sp&ppn=sp&ssid=lt1jlomp4w0000001727590086370&qH=979dd593b8c05a74,23999,"phone design is very eye catching !! love at first sight :) finishing with aluminium is very good and it doesn't slip. fits perfectly in the pocket.\ni feel camera quality should have been of much good quality, it shows small dots when fully zoomed in the pic.\nMenu style should be more attractive.",4.0
3554,"MOTOROLA Moto X (2nd Generation) (Black Leather, 16 GB)",https://www.flipkart.com/motorola-moto-x-2nd-generation-black-leather-16-gb/p/itmevt7pswhqcw2y?pid=MOBDZ7WZFXPXWFE8&lid=LSTMOBDZ7WZFXPXWFE86AXC5L&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_4_90&otracker=search&otracker1=search&fm=Search&iid=e40aeca6-bdb4-45cb-9233-95d7621eb655.MOBDZ7WZFXPXWFE8.SEARCH&ppt=sp&ppn=sp&ssid=lt1jlomp4w0000001727590086370&qH=979dd593b8c05a74,23999,"Well this has been the best phone that I have ever used.\n\nBuild quality : 9/10.\nPerformance: 9/10.\nCamera: Rear - 8/10 , Front - 7/10 (In well lit areas)\nInterface : 10/10 (It cant be better)\nBattery : 8/10.(It is good but not the best. 2100mh does good).\nStorage : 7/10 (10 GB user space available is good for medium range usage).\n\nIf you have a need of a phone which has to be fast with an awesome display quality with medium storage for your needs, well this is the one.",4.0
3555,"MOTOROLA Moto X (2nd Generation) (Black Leather, 16 GB)",https://www.flipkart.com/motorola-moto-x-2nd-generation-black-leather-16-gb/p/itmevt7pswhqcw2y?pid=MOBDZ7WZFXPXWFE8&lid=LSTMOBDZ7WZFXPXWFE86AXC5L&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_4_90&otracker=search&otracker1=search&fm=Search&iid=e40aeca6-bdb4-45cb-9233-95d7621eb655.MOBDZ7WZFXPXWFE8.SEARCH&ppt=sp&ppn=sp&ssid=lt1jlomp4w0000001727590086370&qH=979dd593b8c05a74,23999,"The Good: The Motorola Moto X has a sharp 1080p screen, a swift quad-core processor, nifty software and gesture features, and customizable design options that'll fit anyone's style.\nThe Bad: A minimalist camera, combined with no expandable memory, are the handset's only drawbacks.\nThe Bottom Line The Motorola Moto X hits all the right notes, delivering stock Android inside a powerful high-end handset that you can customize yourself.",4.0


In [17]:
# Set options to display more characters
pd.set_option('display.max_colwidth', None)  

In [19]:
import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oneplus_cleaned.csv"
reviews_ratings_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oneplus_reviews_ratings.csv"
output_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oneplus_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")


Merged file saved to C:\Users\Elakkiya\Downloads\flipkart\oneplus_merged.csv


In [20]:
df=pd.read_csv(r"C:\Users\Elakkiya\Downloads\flipkart\oneplus_merged.csv")

In [21]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,"OnePlus Nord CE4 (Celadon Marble, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-celadon-marble-128-gb/p/itm5a09089114afb?pid=MOBGZN8YJ4KZ2KNH&lid=LSTMOBGZN8YJ4KZ2KNHH3PCNN&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&iid=7d930946-5029-430d-acb7-6cd08741df59.MOBGZN8YJ4KZ2KNH.SEARCH&ssid=jndt1q4pb40000001727628684372&qH=37695f7554f510f0,24576.0,Guys if you want big battery with super fast charger it should be great üòÉ choice also snapdragon 7gen 3 chipset excellent gaming performance and also Camaro is good overall mind blowing purchase blindly you can go üî•,5.0
1,"OnePlus Nord CE4 (Celadon Marble, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-celadon-marble-128-gb/p/itm5a09089114afb?pid=MOBGZN8YJ4KZ2KNH&lid=LSTMOBGZN8YJ4KZ2KNHH3PCNN&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&iid=7d930946-5029-430d-acb7-6cd08741df59.MOBGZN8YJ4KZ2KNH.SEARCH&ssid=jndt1q4pb40000001727628684372&qH=37695f7554f510f0,24576.0,"It's A Very Good Mobile Value For Money\n\nRatingsüëáüèª\nProformance/Gaming: 10/10\nRear Camera Photo: 10/10\nRear Camera Video: 8/10\nFront Camera Photo: 9.5/10\nFront Camera Video: 9.5/10\nPortrait Photo: 10/10\nPortrait Video: 10/10\nCamera Stability 1080p 60fps Good, 4k 30fps Bad\nCamera Zoom: 5x Good, 10x/20x Bad\nBattery Backup: 10 hours+\nBattery Charging Time: 20 Minutes\nCharger: 10/10\nDesign: 10/10\nDisplay: 10/10\nOS: Good\nSoftware Update Policy: 2 Year Software Update 3 Year Security Update\n\nUnder 25k Best Mobile 2024 OnePlus Nord CE 4 And Motorola Edge 50 Fusion But Motorola Does Not Have A Service Center, It Is Available At Some Places And Motorola Does Not Provide Software And Security Updates, It Provides Updates After 4-5 Months, That's Why OnePlus No.1",5.0
2,"OnePlus Nord CE4 (Celadon Marble, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-celadon-marble-128-gb/p/itm5a09089114afb?pid=MOBGZN8YJ4KZ2KNH&lid=LSTMOBGZN8YJ4KZ2KNHH3PCNN&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&iid=7d930946-5029-430d-acb7-6cd08741df59.MOBGZN8YJ4KZ2KNH.SEARCH&ssid=jndt1q4pb40000001727628684372&qH=37695f7554f510f0,24576.0,Awesome üëç thanks OnePlus,5.0
3,"OnePlus Nord CE4 (Celadon Marble, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-celadon-marble-128-gb/p/itm5a09089114afb?pid=MOBGZN8YJ4KZ2KNH&lid=LSTMOBGZN8YJ4KZ2KNHH3PCNN&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&iid=7d930946-5029-430d-acb7-6cd08741df59.MOBGZN8YJ4KZ2KNH.SEARCH&ssid=jndt1q4pb40000001727628684372&qH=37695f7554f510f0,24576.0,Best phone at this price range üíï,5.0
4,"OnePlus Nord CE4 (Celadon Marble, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-celadon-marble-128-gb/p/itm5a09089114afb?pid=MOBGZN8YJ4KZ2KNH&lid=LSTMOBGZN8YJ4KZ2KNHH3PCNN&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&iid=7d930946-5029-430d-acb7-6cd08741df59.MOBGZN8YJ4KZ2KNH.SEARCH&ssid=jndt1q4pb40000001727628684372&qH=37695f7554f510f0,24576.0,Not bad,4.0
...,...,...,...,...,...
1473,"OnePlus 7T (Frosted Silver, 128 GB)",https://www.flipkart.com/oneplus-7t-frosted-silver-128-gb/p/itma74f3aece46b1?pid=MOBFKWSYFG45UYEB&lid=LSTMOBFKWSYFG45UYEBLUORUA&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_30&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=search-autosuggest&iid=2c3b2b72-1716-4666-bd34-b61ca6e23125.MOBFKWSYFG45UYEB.SEARCH&ppt=sp&ppn=sp&ssid=gz3yyslldc0000001727628698632&qH=37695f7554f510f0,30999.0,This phone is great,5.0
1474,"OnePlus 7T (Frosted Silver, 128 GB)",https://www.flipkart.com/oneplus-7t-frosted-silver-128-gb/p/itma74f3aece46b1?pid=MOBFKWSYFG45UYEB&lid=LSTMOBFKWSYFG45UYEBLUORUA&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_30&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=search-autosuggest&iid=2c3b2b72-1716-4666-bd34-b61ca6e23125.MOBFKWSYFG45UYEB.SEARCH&ppt=sp&ppn=sp&ssid=gz3yyslldc0000001727628698632&qH=37695f7554f510f0,30999.0,great device and who else wanted it for gaming trust me i play pubg on hdr extereme and even i also play competitive and it never laggedso who wanted it for gaming just go for it it is a great phone,5.0
1475,"OnePlus 7T (Frosted Silver, 128 GB)",https://www.flipkart.com/oneplus-7t-frosted-silver-128-gb/p/itma74f3aece46b1?pid=MOBFKWSYFG45UYEB&lid=LSTMOBFKWSYFG45UYEBLUORUA&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_30&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=search-autosuggest&iid=2c3b2b72-1716-4666-bd34-b61ca6e23125.MOBFKWSYFG45UYEB.SEARCH&ppt=sp&ppn=sp&ssid=gz3yyslldc0000001727628698632&qH=37695f7554f510f0,30999.0,"Excellent device , best for movies on speakers gives great dolby Atmos , gives much good graphics for games and many more .\n\nIt's a device which is assembled all outstanding parts .",5.0
1476,"OnePlus 7T (Frosted Silver, 128 GB)",https://www.flipkart.com/oneplus-7t-frosted-silver-128-gb/p/itma74f3aece46b1?pid=MOBFKWSYFG45UYEB&lid=LSTMOBFKWSYFG45UYEBLUORUA&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_30&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=search-autosuggest&iid=2c3b2b72-1716-4666-bd34-b61ca6e23125.MOBFKWSYFG45UYEB.SEARCH&ppt=sp&ppn=sp&ssid=gz3yyslldc0000001727628698632&qH=37695f7554f510f0,30999.0,THIS PHONE IS A DEVIL.....IT'S SO SO ASWOOME ......‚ù§Ô∏è,5.0


In [22]:
import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oppo_cleaned.csv"
reviews_ratings_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oppo_reviews_ratings.csv"
output_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\oppo_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")


Merged file saved to C:\Users\Elakkiya\Downloads\flipkart\oppo_merged.csv


In [23]:
df=pd.read_csv(r"C:\Users\Elakkiya\Downloads\flipkart\oppo_merged.csv")

In [24]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&iid=a9c6207a-787f-4226-bb68-9f0c056e362a.MOBGZFDWBNU6AM4U.SEARCH&ssid=dafaamg05c0000001727725784470&qH=37695f7554f510f0,23999,Phone is nice but need improvement in camera performance,4
1,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&iid=a9c6207a-787f-4226-bb68-9f0c056e362a.MOBGZFDWBNU6AM4U.SEARCH&ssid=dafaamg05c0000001727725784470&qH=37695f7554f510f0,23999,"Blue color is an amaziong shocker, shines attractivly, wow looking phone with good selfie sony camera",5
2,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&iid=a9c6207a-787f-4226-bb68-9f0c056e362a.MOBGZFDWBNU6AM4U.SEARCH&ssid=dafaamg05c0000001727725784470&qH=37695f7554f510f0,23999,Very nice product...but battery issue,4
3,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&iid=a9c6207a-787f-4226-bb68-9f0c056e362a.MOBGZFDWBNU6AM4U.SEARCH&ssid=dafaamg05c0000001727725784470&qH=37695f7554f510f0,23999,"Read some reviews, using this phone in dark mode to save battery , which actually worked, now the power backup is almost more than a dayyyyyyyy",5
4,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&iid=a9c6207a-787f-4226-bb68-9f0c056e362a.MOBGZFDWBNU6AM4U.SEARCH&ssid=dafaamg05c0000001727725784470&qH=37695f7554f510f0,23999,Disighn display camera battery performance super,5
...,...,...,...,...,...
2152,"OPPO Reno11 Pro 5G (Rock Grey, 256 GB)",https://www.flipkart.com/oppo-reno11-pro-5g-rock-grey-256-gb/p/itm41ee989232c22?pid=MOBGWU4CKWXGZFWC&lid=LSTMOBGWU4CKWXGZFWCSD9X5W&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_38&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=429b2311-767e-4e00-a091-f5777e97a976.MOBGWU4CKWXGZFWC.SEARCH&ppt=sp&ppn=sp&ssid=ozomgf2j280000001727725801881&qH=37695f7554f510f0,29999,good phone love it,5
2153,"OPPO Reno11 Pro 5G (Rock Grey, 256 GB)",https://www.flipkart.com/oppo-reno11-pro-5g-rock-grey-256-gb/p/itm41ee989232c22?pid=MOBGWU4CKWXGZFWC&lid=LSTMOBGWU4CKWXGZFWCSD9X5W&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_38&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=429b2311-767e-4e00-a091-f5777e97a976.MOBGWU4CKWXGZFWC.SEARCH&ppt=sp&ppn=sp&ssid=ozomgf2j280000001727725801881&qH=37695f7554f510f0,29999,Great looks,5
2154,"OPPO Reno11 Pro 5G (Rock Grey, 256 GB)",https://www.flipkart.com/oppo-reno11-pro-5g-rock-grey-256-gb/p/itm41ee989232c22?pid=MOBGWU4CKWXGZFWC&lid=LSTMOBGWU4CKWXGZFWCSD9X5W&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_38&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=429b2311-767e-4e00-a091-f5777e97a976.MOBGWU4CKWXGZFWC.SEARCH&ppt=sp&ppn=sp&ssid=ozomgf2j280000001727725801881&qH=37695f7554f510f0,29999,Front camera is not good lost of money,1
2155,"OPPO Reno11 Pro 5G (Rock Grey, 256 GB)",https://www.flipkart.com/oppo-reno11-pro-5g-rock-grey-256-gb/p/itm41ee989232c22?pid=MOBGWU4CKWXGZFWC&lid=LSTMOBGWU4CKWXGZFWCSD9X5W&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_38&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=429b2311-767e-4e00-a091-f5777e97a976.MOBGWU4CKWXGZFWC.SEARCH&ppt=sp&ppn=sp&ssid=ozomgf2j280000001727725801881&qH=37695f7554f510f0,29999,Super,5


In [25]:
import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\flipkart_cleaned_google.csv"
reviews_ratings_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\google_reviews_ratings_all_products.csv"
output_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\google_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")


Merged file saved to C:\Users\Elakkiya\Downloads\flipkart\google_merged.csv


In [26]:
df=pd.read_csv(r"C:\Users\Elakkiya\Downloads\flipkart\google_merged.csv")

In [27]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,"Google Pixel 7a (Charcoal, 128 GB)",https://www.flipkart.com/google-pixel-7a-charcoal-128-gb/p/itmb4d7b100b1a4d?pid=MOBGZCQMHGWDYZQ7&lid=LSTMOBGZCQMHGWDYZQ7XLJASQ&marketplace=FLIPKART&q=google+mobiles&store=tyy%2F4io&srno=s_1_1&otracker=AS_Query_HistoryAutoSuggest_1_14_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_14_na_na_na&iid=09f11e69-32b0-4b6a-923d-af81bc54bd88.MOBGZCQMHGWDYZQ7.SEARCH&ssid=6woo4r91pc0000001727628620394&qH=7961bfd86f1fa98c,31999,"Its extraordinary, something new, everything thing is good about this phone except the battery life",4
1,"Google Pixel 7a (Charcoal, 128 GB)",https://www.flipkart.com/google-pixel-7a-charcoal-128-gb/p/itmb4d7b100b1a4d?pid=MOBGZCQMHGWDYZQ7&lid=LSTMOBGZCQMHGWDYZQ7XLJASQ&marketplace=FLIPKART&q=google+mobiles&store=tyy%2F4io&srno=s_1_1&otracker=AS_Query_HistoryAutoSuggest_1_14_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_14_na_na_na&iid=09f11e69-32b0-4b6a-923d-af81bc54bd88.MOBGZCQMHGWDYZQ7.SEARCH&ssid=6woo4r91pc0000001727628620394&qH=7961bfd86f1fa98c,31999,Google need to fix the battery issue and heating issue.\nOverall phone awesome,5
2,"Google Pixel 7a (Charcoal, 128 GB)",https://www.flipkart.com/google-pixel-7a-charcoal-128-gb/p/itmb4d7b100b1a4d?pid=MOBGZCQMHGWDYZQ7&lid=LSTMOBGZCQMHGWDYZQ7XLJASQ&marketplace=FLIPKART&q=google+mobiles&store=tyy%2F4io&srno=s_1_1&otracker=AS_Query_HistoryAutoSuggest_1_14_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_14_na_na_na&iid=09f11e69-32b0-4b6a-923d-af81bc54bd88.MOBGZCQMHGWDYZQ7.SEARCH&ssid=6woo4r91pc0000001727628620394&qH=7961bfd86f1fa98c,31999,Nice design and colour camer is at its peak as far as now,5
3,"Google Pixel 7a (Charcoal, 128 GB)",https://www.flipkart.com/google-pixel-7a-charcoal-128-gb/p/itmb4d7b100b1a4d?pid=MOBGZCQMHGWDYZQ7&lid=LSTMOBGZCQMHGWDYZQ7XLJASQ&marketplace=FLIPKART&q=google+mobiles&store=tyy%2F4io&srno=s_1_1&otracker=AS_Query_HistoryAutoSuggest_1_14_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_14_na_na_na&iid=09f11e69-32b0-4b6a-923d-af81bc54bd88.MOBGZCQMHGWDYZQ7.SEARCH&ssid=6woo4r91pc0000001727628620394&qH=7961bfd86f1fa98c,31999,Nice Phone,4
4,"Google Pixel 7a (Charcoal, 128 GB)",https://www.flipkart.com/google-pixel-7a-charcoal-128-gb/p/itmb4d7b100b1a4d?pid=MOBGZCQMHGWDYZQ7&lid=LSTMOBGZCQMHGWDYZQ7XLJASQ&marketplace=FLIPKART&q=google+mobiles&store=tyy%2F4io&srno=s_1_1&otracker=AS_Query_HistoryAutoSuggest_1_14_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_14_na_na_na&iid=09f11e69-32b0-4b6a-923d-af81bc54bd88.MOBGZCQMHGWDYZQ7.SEARCH&ssid=6woo4r91pc0000001727628620394&qH=7961bfd86f1fa98c,31999,"Very good phone photography, average mobile performance",4
...,...,...,...,...,...
1555,"Google Pixel 8 (Rose, 128 GB)",https://www.flipkart.com/google-pixel-8-rose-128-gb/p/itm67e2a2531aaac?pid=MOBGT5F22JFCABET&lid=LSTMOBGT5F22JFCABETVKHMHM&marketplace=FLIPKART&q=google+mobiles&store=tyy%2F4io&srno=s_2_30&otracker=AS_Query_HistoryAutoSuggest_1_14_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_14_na_na_na&fm=search-autosuggest&iid=b1f2834a-1d8c-49d2-aa70-022a032e2904.MOBGT5F22JFCABET.SEARCH&ppt=sp&ppn=sp&ssid=ekzl9b96k00000001727628646583&qH=7961bfd86f1fa98c,37999,Good,5
1556,"Google Pixel 8 (Rose, 128 GB)",https://www.flipkart.com/google-pixel-8-rose-128-gb/p/itm67e2a2531aaac?pid=MOBGT5F22JFCABET&lid=LSTMOBGT5F22JFCABETVKHMHM&marketplace=FLIPKART&q=google+mobiles&store=tyy%2F4io&srno=s_2_30&otracker=AS_Query_HistoryAutoSuggest_1_14_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_14_na_na_na&fm=search-autosuggest&iid=b1f2834a-1d8c-49d2-aa70-022a032e2904.MOBGT5F22JFCABET.SEARCH&ppt=sp&ppn=sp&ssid=ekzl9b96k00000001727628646583&qH=7961bfd86f1fa98c,37999,"on 120 hz , pixel barely works for 12 hrs ... and battery saver is not working ... even camera is not dat much satisfying, its same as pixel 4a ... when i m playing pubg , its lags so much ... so processor isnot good enough for pubg also ... performance is totally weak ...",1
1557,"Google Pixel 8 (Rose, 128 GB)",https://www.flipkart.com/google-pixel-8-rose-128-gb/p/itm67e2a2531aaac?pid=MOBGT5F22JFCABET&lid=LSTMOBGT5F22JFCABETVKHMHM&marketplace=FLIPKART&q=google+mobiles&store=tyy%2F4io&srno=s_2_30&otracker=AS_Query_HistoryAutoSuggest_1_14_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_14_na_na_na&fm=search-autosuggest&iid=b1f2834a-1d8c-49d2-aa70-022a032e2904.MOBGT5F22JFCABET.SEARCH&ppt=sp&ppn=sp&ssid=ekzl9b96k00000001727628646583&qH=7961bfd86f1fa98c,37999,"I took this phone more to capture best shots keeping in mind google phone but camera disappoints me, photos gets blur even for nearer captured photos.",3
1558,"Google Pixel 8 (Rose, 128 GB)",https://www.flipkart.com/google-pixel-8-rose-128-gb/p/itm67e2a2531aaac?pid=MOBGT5F22JFCABET&lid=LSTMOBGT5F22JFCABETVKHMHM&marketplace=FLIPKART&q=google+mobiles&store=tyy%2F4io&srno=s_2_30&otracker=AS_Query_HistoryAutoSuggest_1_14_na_na_na&otracker1=AS_Query_HistoryAutoSuggest_1_14_na_na_na&fm=search-autosuggest&iid=b1f2834a-1d8c-49d2-aa70-022a032e2904.MOBGT5F22JFCABET.SEARCH&ppt=sp&ppn=sp&ssid=ekzl9b96k00000001727628646583&qH=7961bfd86f1fa98c,37999,Very smooth and very compact. Stock android with no bloatware.,5


In [28]:
import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\flipkart_cleaned_honor.csv"
reviews_ratings_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\honor_reviews_ratings_all_products.csv"
output_file_path = r"C:\Users\Elakkiya\Downloads\flipkart\Honor_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")


Merged file saved to C:\Users\Elakkiya\Downloads\flipkart\Honor_merged.csv


In [29]:
df=pd.read_csv(r"C:\Users\Elakkiya\Downloads\flipkart\Honor_merged.csv")

In [30]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,"Honor X9b (Midnight Black, 256 GB)",https://www.flipkart.com/honor-x9b-midnight-black-256-gb/p/itmab18898f92ae8?pid=MOBGY9EXSCVBJ9DR&lid=LSTMOBGY9EXSCVBJ9DRBRQWXM&marketplace=FLIPKART&store=tyy%2F4io&srno=b_1_1&otracker=CLP_filters&iid=f766ba76-453b-4dd7-b905-a3127fe12c55.MOBGY9EXSCVBJ9DR.SEARCH&ssid=zbq6j4bbvk0000001727615018338,22997,"Supeeeeer mobile,, good battery backup, bright and clour full display only problem is charger not in the box",4.0
1,"Honor X9b (Midnight Black, 256 GB)",https://www.flipkart.com/honor-x9b-midnight-black-256-gb/p/itmab18898f92ae8?pid=MOBGY9EXSCVBJ9DR&lid=LSTMOBGY9EXSCVBJ9DRBRQWXM&marketplace=FLIPKART&store=tyy%2F4io&srno=b_1_1&otracker=CLP_filters&iid=f766ba76-453b-4dd7-b905-a3127fe12c55.MOBGY9EXSCVBJ9DR.SEARCH&ssid=zbq6j4bbvk0000001727615018338,22997,Smooth usage. Better battery continually playing pubg 7 Hr above 25% charge remaining . Good gaming experience ‚ù§Ô∏è,5.0
2,"Honor X9b (Midnight Black, 256 GB)",https://www.flipkart.com/honor-x9b-midnight-black-256-gb/p/itmab18898f92ae8?pid=MOBGY9EXSCVBJ9DR&lid=LSTMOBGY9EXSCVBJ9DRBRQWXM&marketplace=FLIPKART&store=tyy%2F4io&srno=b_1_1&otracker=CLP_filters&iid=f766ba76-453b-4dd7-b905-a3127fe12c55.MOBGY9EXSCVBJ9DR.SEARCH&ssid=zbq6j4bbvk0000001727615018338,22997,Good,4.0
3,"Honor X9b (Midnight Black, 256 GB)",https://www.flipkart.com/honor-x9b-midnight-black-256-gb/p/itmab18898f92ae8?pid=MOBGY9EXSCVBJ9DR&lid=LSTMOBGY9EXSCVBJ9DRBRQWXM&marketplace=FLIPKART&store=tyy%2F4io&srno=b_1_1&otracker=CLP_filters&iid=f766ba76-453b-4dd7-b905-a3127fe12c55.MOBGY9EXSCVBJ9DR.SEARCH&ssid=zbq6j4bbvk0000001727615018338,22997,Very good phone,5.0
4,"Honor X9b (Midnight Black, 256 GB)",https://www.flipkart.com/honor-x9b-midnight-black-256-gb/p/itmab18898f92ae8?pid=MOBGY9EXSCVBJ9DR&lid=LSTMOBGY9EXSCVBJ9DRBRQWXM&marketplace=FLIPKART&store=tyy%2F4io&srno=b_1_1&otracker=CLP_filters&iid=f766ba76-453b-4dd7-b905-a3127fe12c55.MOBGY9EXSCVBJ9DR.SEARCH&ssid=zbq6j4bbvk0000001727615018338,22997,I am very glad to say that this phone works well even when there is a poor network in your areaüòÇüëçüëçI love honor,5.0
...,...,...,...,...,...
1413,"Honor 90 5G (Diamond Silver, 256 GB)",https://www.flipkart.com/honor-90-5g-diamond-silver-256-gb/p/itmf7d76a3bc7242?pid=MOBGTSCVYRBHQE4N&lid=LSTMOBGTSCVYRBHQE4NP7LZOX&marketplace=FLIPKART&store=tyy%2F4io&srno=b_2_30&otracker=CLP_filters&fm=organic&iid=a68af1e8-0d2a-4933-a7fa-4d6b4079dc63.MOBGTSCVYRBHQE4N.SEARCH&ppt=browse&ppn=browse&ssid=k2enbav7pc0000001727615043576,25932,Good phone,5.0
1414,"Honor 90 5G (Diamond Silver, 256 GB)",https://www.flipkart.com/honor-90-5g-diamond-silver-256-gb/p/itmf7d76a3bc7242?pid=MOBGTSCVYRBHQE4N&lid=LSTMOBGTSCVYRBHQE4NP7LZOX&marketplace=FLIPKART&store=tyy%2F4io&srno=b_2_30&otracker=CLP_filters&fm=organic&iid=a68af1e8-0d2a-4933-a7fa-4d6b4079dc63.MOBGTSCVYRBHQE4N.SEARCH&ppt=browse&ppn=browse&ssid=k2enbav7pc0000001727615043576,25932,"Speaker made on low quality, screen glass broken itself even I am not touch to phone. Battery life is just 2-3 hrs, waste of money.\n\nI am satisfied with this phone.",1.0
1415,"Honor 90 5G (Diamond Silver, 256 GB)",https://www.flipkart.com/honor-90-5g-diamond-silver-256-gb/p/itmf7d76a3bc7242?pid=MOBGTSCVYRBHQE4N&lid=LSTMOBGTSCVYRBHQE4NP7LZOX&marketplace=FLIPKART&store=tyy%2F4io&srno=b_2_30&otracker=CLP_filters&fm=organic&iid=a68af1e8-0d2a-4933-a7fa-4d6b4079dc63.MOBGTSCVYRBHQE4N.SEARCH&ppt=browse&ppn=browse&ssid=k2enbav7pc0000001727615043576,25932,"IRRITATING. I AM AN HONOR LOVER. BUT AFTER BUYING, THIS ONE I REGRET IT. TOTAL WASTAGE OF MONEY. SIMPLE FUNCTIONS ARE VERY COMPLICATED. 200 MP CAMERA? NOT REALLY. NOT WORTHY FOR 30K+. HONOR LITE 10 WHICH I USED BEFOREWAS THE BEST MOBILE. BUT HONOR 90 HAS GREATLY DISAPPOINTED ME.",1.0
1416,"Honor 90 5G (Diamond Silver, 256 GB)",https://www.flipkart.com/honor-90-5g-diamond-silver-256-gb/p/itmf7d76a3bc7242?pid=MOBGTSCVYRBHQE4N&lid=LSTMOBGTSCVYRBHQE4NP7LZOX&marketplace=FLIPKART&store=tyy%2F4io&srno=b_2_30&otracker=CLP_filters&fm=organic&iid=a68af1e8-0d2a-4933-a7fa-4d6b4079dc63.MOBGTSCVYRBHQE4N.SEARCH&ppt=browse&ppn=browse&ssid=k2enbav7pc0000001727615043576,25932,When the phone was turned on it had the call list what so? There was no charge on the phone,1.0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10170 entries, 0 to 10169
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Product_Name  10170 non-null  object 
 1   Reviews       10161 non-null  object 
 2   Ratings       10161 non-null  float64
dtypes: float64(1), object(2)
memory usage: 238.5+ KB


In [4]:
df.isnull().sum()

Product_Name    0
Reviews         9
Ratings         9
dtype: int64

In [3]:
# Set options to display more characters
pd.set_option('display.max_colwidth', None)  