In [None]:
# -----------------------------------------------
# 1. Import Libraries
# -----------------------------------------------
import pandas as pd
import numpy as np

In [None]:
# -----------------------------------------------
# 2. Load and Explore Dataset
# -----------------------------------------------
# NOTE: Replace 'product_usage.csv' with your actual CSV file name
df = pd.read_csv("product_usage.csv")

print("📌 First 5 rows of the dataset:")
print(df.head())

print("\n📌 Dataset Info:")
print(df.info())

print("\n📌 Summary Statistics:")
print(df.describe(include='all'))

In [None]:
# -----------------------------------------------
# 3. Clean and Prepare the Data
# -----------------------------------------------

# Example: Rename ambiguous columns (edit based on actual column names)
df.rename(columns={
    'uid': 'user_id',
    'ts': 'timestamp',
    'feat': 'feature_used'
}, inplace=True)

# Check and handle missing values
print("\n📌 Missing Values Before Cleaning:")
print(df.isnull().sum())

# Drop rows with missing data
df.dropna(inplace=True)

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Check data types
print("\n📌 Updated Data Types:")
print(df.dtypes)

In [None]:
# -----------------------------------------------
# 4. Analyze Feature Usage
# -----------------------------------------------

# Total feature usage count
feature_counts = df['feature_used'].value_counts().reset_index()
feature_counts.columns = ['feature', 'total_usage']

# Average usage per user
avg_usage = df.groupby(['user_id', 'feature_used']).size().groupby('feature_used').mean().reset_index()
avg_usage.columns = ['feature', 'avg_usage_per_user']

# Merge the two summaries
summary = pd.merge(feature_counts, avg_usage, on='feature')

print("\n📌 Feature Usage Summary Table:")
print(summary)

In [None]:
# -----------------------------------------------
# 5. Visualization (Optional)
# -----------------------------------------------
import matplotlib.pyplot as plt

plt.figure(figsize=(10,6))
plt.bar(summary['feature'], summary['total_usage'], color='lightblue')
plt.title("📊 Total Feature Usage")
plt.xlabel("Feature")
plt.ylabel("Usage Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# -----------------------------------------------
# 6. Markdown Summary (Written observations)
# -----------------------------------------------
from IPython.display import display, Markdown

display(Markdown("## 💡 Summary Insights"))
display(Markdown(f"- Total unique features: **{df['feature_used'].nunique()}**"))
display(Markdown(f"- Total unique users: **{df['user_id'].nunique()}**"))
top_feature = summary.loc[summary['total_usage'].idxmax()]
display(Markdown(f"- **Most Used Feature**: `{top_feature['feature']}` with `{top_feature['total_usage']}` uses"))