In [None]:
# Step 1: Import Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")

# Step 2: Load Dataset
df = sns.load_dataset("iris")  # You can replace this with your own dataset

# Step 3: Basic Information
print("First 5 rows:\n", df.head())
print("\nInfo:\n")
df.info()
print("\nSummary Statistics:\n", df.describe())
print("\nSpecies Distribution:\n", df['species'].value_counts())

# Step 4: Check for Missing Values
print("\nMissing Values:\n", df.isnull().sum())

# Step 5: Univariate Analysis
df.hist(figsize=(10, 8), bins=15)
plt.suptitle("Histograms of Features")
plt.show()

sns.boxplot(data=df, orient="h")
plt.title("Boxplot of All Features")
plt.show()

# Step 6: Bivariate Analysis
sns.pairplot(df, hue='species')
plt.suptitle("Pairwise Feature Relationships", y=1.02)
plt.show()

correlation = df.corr(numeric_only=True)
sns.heatmap(correlation, annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()

# Step 7: Feature Engineering
df['petal_area'] = df['petal_length'] * df['petal_width']
sns.boxplot(x='species', y='petal_area', data=df)
plt.title("Boxplot of Petal Area by Species")
plt.show()

# Step 8: Final Insights
print("\n--- Final Insights ---")
print("1. Petal features show strong correlation and class separation.")
print("2. No missing values found in this dataset.")
print("3. Newly created 'petal_area' clearly distinguishes species.")

In [None]:
from google.colab import drive
drive.mount('/content/drive')