In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(10)
category = ["Electronics","Clothing","Home Goods","Software"]
product_category = [category[x] for x in np.random.randint(0,4,100)]
u_sold = np.random.randint(1,1000,100)
rev = (u_sold * 250 + np.random.uniform(50000, 800000, 100)).round(2)


In [None]:
np.random.seed(10)
df = pd.DataFrame({"Product_Category":product_category, "Units_Sold":u_sold, "Revenue":rev})

date_series=pd.date_range(start="01-01-2024",periods=24,freq='MS')
df["Month"]=np.random.choice(date_series,size=100)
df=df.sort_values(by='Month').reset_index(drop=True)
df['Month'] = df['Month'].dt.strftime('%b %Y')

df['Revenue'] = df['Revenue'].mask(np.random.random(100)<0.1)
df.info()
print(f"\n{df.head()}\n\nDatatypes: \n{df.dtypes} \n\nNulls:\n {df.isnull().sum()}")
cat_avg_rev=df.groupby('Product_Category')['Revenue'].mean()

df['Revenue']=df['Revenue'].fillna(df['Product_Category'].map(cat_avg_rev))

print(f"\nShape: {df.shape}\n\nStats:\n{df.describe()}\n\nCorr:\n{df[['Units_Sold','Revenue']].corr()}")
print(f"\nAvg Rev by Cat:\n{df.groupby('Product_Category')['Revenue'].mean()}")


In [None]:
df['Revenue'], df['Units_Sold'] = df['Revenue'].astype(float), df['Units_Sold'].astype(int)
print(f"\nMean: {df.Revenue.mean()}\nMedian: {df.Revenue.median()}\nStd: {df.Revenue.std()}\nMin: {df.Revenue.min()}\nMax: {df.Revenue.max()}")

In [None]:
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.hist(df['Revenue'], bins=15, edgecolor='black', alpha=0.5)
plt.title('Revenue Histogram'); plt.xlabel("Value"); plt.ylabel("Frequency")

plt.subplot(1,2,2)
df.groupby('Product_Category')['Units_Sold'].sum().plot(kind="bar")
plt.title('Units Sold per Category'); plt.xlabel("Category"); plt.ylabel("Units")
plt.tight_layout(); plt.show()

