# Nykaa 

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [3]:
df = pd.read_csv("Nykaa_Products_2023.csv")

In [4]:
data = pd.read_csv('Nykaa_Products_2023.csv', encoding_errors='ignore')

In [5]:
data.head()

Unnamed: 0,Product Name,Reviews,Original Price,Offer Price,Discount,Free Gifts
0,L'Oreal Paris Glycolic Bright Serum With Glyco...,-5109.0,MRP:₹749,₹599,20% Off,
1,Nykaa Skin Secrets Gold Sheet Mask + Nykaa Ski...,-25.0,MRP:₹1099,₹879,20% Off,Extra 10% Off
2,Dot & Key Vitamin C + E Face Sunscreen With SP...,-3606.0,MRP:₹495,₹421,15% Off,Extra 5% Off
3,Olay Total Effects 7 In One Anti-Ageing Day Cr...,-6046.0,MRP:₹798,₹638,20% Off,
4,Nykaa SKINRX 10% Vitamin C with 5% Niacinamide...,-1421.0,MRP:₹699,₹559,20% Off,


In [6]:
# Step 2: Clean Reviews → extract digits only
df["Reviews"] = df["Reviews"].astype(str).str.extract(r'(\d+)')
df["Reviews"] = df["Reviews"].astype(float)

# Step 3: Clean Prices → remove everything except digits
df["Original Price"] = df["Original Price"].astype(str).str.replace(r"[^\d]", "", regex=True)
df["Offer Price"] = df["Offer Price"].astype(str).str.replace(r"[^\d]", "", regex=True)

df["Original Price"] = pd.to_numeric(df["Original Price"], errors="coerce")
df["Offer Price"] = pd.to_numeric(df["Offer Price"], errors="coerce")

# Step 4: Clean Discount → keep only numbers
df["Discount"] = df["Discount"].astype(str).str.extract(r'(\d+)')
df["Discount"] = pd.to_numeric(df["Discount"], errors="coerce")

# Step 5: Clean Free Gifts → binary flag
df["Free Gifts"] = df["Free Gifts"].notna().astype(int)

# Step 6: Derived columns
df["Savings"] = df["Original Price"] - df["Offer Price"]
df["Calculated Discount %"] = (df["Savings"] / df["Original Price"] * 100).round(2)

# Quick check
print(df.head(10))
print(df.info())
print(df.describe())

                                        Product Name  Reviews  Original Price  \
0  L'Oreal Paris Glycolic Bright Serum With Glyco...   5109.0           749.0   
1  Nykaa Skin Secrets Gold Sheet Mask + Nykaa Ski...     25.0          1099.0   
2  Dot & Key Vitamin C + E Face Sunscreen With SP...   3606.0           495.0   
3  Olay Total Effects 7 In One Anti-Ageing Day Cr...   6046.0           798.0   
4  Nykaa SKINRX 10% Vitamin C with 5% Niacinamide...   1421.0           699.0   
5  Biotique Bio Papaya Revitalizing Tan Removal S...  47161.0           100.0   
6         Dr. Sheth's Ceramide & Vitamin C Sunscreen   9803.0           499.0   
7  Lotus Herbals YouthRx Anti-Ageing Transforming...   3583.0           755.0   
8  Raaga Professional De Tan Removal Creme With K...   3266.0          1155.0   
9         Kapiva Ayurveda Skin Rituals Aloe Rose Gel     13.0           399.0   

   Offer Price  Discount  Free Gifts  Savings  Calculated Discount %  
0          599      20.0           0 

**1. Which product has the highest number of reviews?**

In [8]:
highest_reviews = df[df['Reviews'] == df['Reviews'].max()][['Product Name', 'Reviews']]
print(highest_reviews)

                                          Product Name   Reviews
10   Nykaa Naturals Skin Secrets Exotic Indulgence ...  188881.0
88   Nykaa Skin Secrets Exotic Indulgence Gold Shee...  188881.0
188  Nykaa Skin Secrets Exotic Indulgence Rose + Go...  188881.0
190  Nykaa Skin Secrets Exotic Indulgence Honey + L...  188881.0
202  Nykaa Skin Secrets Exotic Indulgence Rice Wate...  188881.0
226  Nykaa Skin Secrets Exotic Indulgence Strawberr...  188881.0
231  Nykaa Skin Secrets Exotic Indulgence Green Tea...  188881.0
237  Nykaa Skin Secrets Exotic Indulgence Black Mud...  188881.0
263  Nykaa Skin Secrets Exotic Indulgence Rice + Ca...  188881.0
295  Nykaa Skin Secrets Exotic Indulgence Acai Berr...  188881.0
315  Nykaa Skin Secrets Exotic Indulgence Snail She...  188881.0
345  Nykaa Skin Secrets Exotic Indulgence Charcoal ...  188881.0
354  Nykaa Skin Secrets Exotic Indulgence Avocado +...  188881.0
474  Nykaa Skin Secrets Exotic Indulgence Matcha Te...  188881.0


**2. Average reviews across all products?**

In [10]:
avg_reviews = df['Reviews'].mean() 
print(avg_reviews)

1693.3425365158532


**3. Top 5 products with most reviews?**

In [12]:
top5_reviews = df[['Product Name', 'Reviews']].sort_values(by='Reviews', ascending=False).head(5)
print(top5_reviews)

                                          Product Name   Reviews
231  Nykaa Skin Secrets Exotic Indulgence Green Tea...  188881.0
188  Nykaa Skin Secrets Exotic Indulgence Rose + Go...  188881.0
354  Nykaa Skin Secrets Exotic Indulgence Avocado +...  188881.0
295  Nykaa Skin Secrets Exotic Indulgence Acai Berr...  188881.0
315  Nykaa Skin Secrets Exotic Indulgence Snail She...  188881.0


**4. Product with highest discount?**

In [14]:
highest_discount= df[['Product Name','Discount']].sort_values(by='Discount', ascending=False).head(1)
print(highest_discount)

                                           Product Name  Discount
6012  Le Marbelle Black Obsidian Roller Face Massage...      88.0


**5. Average original and offer prices**

In [16]:
avg_Original=df['Original Price'].mean()
print(avg_Original)

791.2083240843507


In [17]:
avg_Offer=df['Offer Price'].mean()
print(avg_Offer)

942.0959166666667


**6. Product with highest discount**

In [19]:
highest_discount = df[['Product Name', 'Discount']].iloc[df['Discount'].idxmax()]
print(highest_discount)

Product Name    Le Marbelle Black Obsidian Roller Face Massage...
Discount                                                     88.0
Name: 6012, dtype: object


**7. Product with lowest discount**

In [21]:
lowest_discount = df[['Product Name', 'Discount']].iloc[df['Discount'].idxmin()]
print(lowest_discount)

Product Name    The Face Shop Rice Water Bright Foaming Cleans...
Discount                                                      1.0
Name: 392, dtype: object


**8. Average discount across all products**

In [23]:
avg_discount=df['Discount'].mean()
print(avg_discount)

21.821599377154932


**9. Compare high vs low priced products' reviews**

In [25]:
median_price = df['Price'].median()

high_price = df[df['Price'] > median_price]
low_price = df[df['Price'] <= median_price]


KeyError: 'Price'

In [26]:
median_price=df['Price'].median()
high_price=df[df['Price']>median_price]
low_price=df[df['Price']<median_price]

print("High", high_price['Reviews'].mean())
print("low", low_price['Reviews'].mean())


KeyError: 'Price'

In [None]:
median_price = df['Price'].median()

high_price = df[df['Price'] > median_price]
low_price = df[df['Price'] <= median_price]

print("High-priced products average reviews:", high_price['Reviews'].mean())
print("Low-priced products average reviews:", low_price['Reviews'].mean())