In [None]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Step 2: Load Dataset
# Example: load CSV file
df = pd.read_csv("your_dataset.csv")

# Quick peek at the data
print(df.shape)     # rows & columns
print(df.info())    # column types, missing values
print(df.head())    # first 5 rows

#Step 3: Data Cleaning
# Check missing values
print(df.isnull().sum())

# Drop duplicates
df = df.drop_duplicates()

# Fill or drop missing values depending on context
df['column_name'] = df['column_name'].fillna(df['column_name'].median())

# Step 4: Descriptive Statistics
# Summary stats for numerical features
print(df.describe())

# Summary stats for categorical features
print(df.describe(include='object'))

# Step 5: Univariate Analysis
# Histogram for numerical column
sns.histplot(df['numerical_column'], kde=True)
plt.show()

# Boxplot for detecting outliers
sns.boxplot(x=df['numerical_column'])
plt.show()

# Countplot for categorical column
sns.countplot(x='categorical_column', data=df)
plt.show()

# Step 6: Bivariate Analysis
# Scatter plot (numeric vs numeric)
sns.scatterplot(x='feature1', y='feature2', data=df)
plt.show()

# Correlation heatmap
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()

# Categorical vs numeric (boxplot)
sns.boxplot(x='categorical_column', y='numerical_column', data=df)
plt.show()

# Step 7: Multivariate Analysis
# Pairplot for multiple numeric features
sns.pairplot(df[['feature1', 'feature2', 'feature3']], diag_kind='kde')
plt.show()

# Step 8: Outlier Detection
# Using Z-score
from scipy import stats
z_scores = np.abs(stats.zscore(df.select_dtypes(include=np.number)))
outliers = np.where(z_scores > 3)
print(outliers)

# Or using IQR
Q1 = df['numerical_column'].quantile(0.25)
Q3 = df['numerical_column'].quantile(0.75)
IQR = Q3 - Q1
outliers = df[(df['numerical_column'] < Q1 - 1.5*IQR) | (df['numerical_column'] > Q3 + 1.5*IQR)]
print(outliers)

# Step 9: Insights & Next Steps

# What patterns stand out?

# Are there correlations that suggest predictive features?

# Do categorical groups differ significantly?

# Are there missing/outlier values that must be handled before modeling?

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def run_eda(df, target=None):
    """
    Perform quick EDA on a pandas DataFrame.
    Args:
        df (pd.DataFrame): The dataset
        target (str, optional): Target/label column for supervised tasks
    """
    print("🔍 DATA SHAPE")
    print(df.shape)
    print("\n📋 DATA INFO")
    print(df.info())
    
    print("\n📊 MISSING VALUES")
    print(df.isnull().sum()[df.isnull().sum() > 0])
    
    print("\n📈 DESCRIPTIVE STATISTICS (Numerical)")
    print(df.describe())
    
    print("\n📊 DESCRIPTIVE STATISTICS (Categorical)")
    print(df.describe(include='object'))
    
    # ----------------------------
    # Visualizations
    # ----------------------------
    
    # Histograms for numerical columns
    num_cols = df.select_dtypes(include=np.number).columns
    if len(num_cols) > 0:
        df[num_cols].hist(figsize=(15, 10), bins=20)
        plt.suptitle("Histograms of Numerical Features")
        plt.show()
    
    # Count plots for categorical columns
    cat_cols = df.select_dtypes(include='object').columns
    for col in cat_cols:
        plt.figure(figsize=(8,4))
        sns.countplot(y=col, data=df, order=df[col].value_counts().index)
        plt.title(f"Count Plot - {col}")
        plt.show()
    
    # Correlation heatmap
    if len(num_cols) > 1:
        plt.figure(figsize=(10,6))
        sns.heatmap(df[num_cols].corr(), annot=True, cmap="coolwarm")
        plt.title("Correlation Heatmap")
        plt.show()
    
    # Boxplot for target vs numerical features
    if target and target in df.columns:
        for col in num_cols:
            plt.figure(figsize=(6,4))
            sns.boxplot(x=target, y=col, data=df)
            plt.title(f"{col} vs {target}")
            plt.show()
