In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("sample_dataset.csv")

# Display the first few rows
print(df.head())

# Check data types and missing values
print("\nDataset Info:")
print(df.info())

# Display missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Clean the data by filling missing values with the column mean
df['Sales'].fillna(df['Sales'].mean(), inplace=True)
df['Profit'].fillna(df['Profit'].mean(), inplace=True)


In [None]:
# Basic statistics
print("\nSummary Statistics:")
print(df.describe())

# Group by 'Category' and calculate average Sales
grouped = df.groupby('Category')['Sales'].mean()
print("\nAverage Sales by Category:")
print(grouped)

# Observations
print("\nObservations:")
print("- Categories show differences in average sales.")
print("- Profit and Sales values show a typical spread with some outliers.")


In [None]:
import matplotlib.pyplot as plt

# Line Chart - Sales over Time
plt.figure(figsize=(10, 5))
plt.plot(df['Date'], df['Sales'], label='Sales', color='blue')
plt.title("Sales Over Time")
plt.xlabel("Date")
plt.ylabel("Sales")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Bar Chart - Average Sales by Category
plt.figure(figsize=(6, 4))
df.groupby('Category')['Sales'].mean().plot(kind='bar', color='green')
plt.title("Average Sales by Category")
plt.xlabel("Category")
plt.ylabel("Average Sales")
plt.tight_layout()
plt.show()

# Histogram - Profit Distribution
plt.figure(figsize=(6, 4))
df['Profit'].hist(bins=15, color='skyblue')
plt.title("Distribution of Profit")
plt.xlabel("Profit")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

# Scatter Plot - Sales vs. Profit
plt.figure(figsize=(6, 4))
plt.scatter(df['Sales'], df['Profit'], alpha=0.7, color='purple')
plt.title("Sales vs. Profit")
plt.xlabel("Sales")
plt.ylabel("Profit")
plt.tight_layout()
plt.show()
