# 🩺 Liver Cirrhosis Dataset – Exploratory Data Analysis

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.rcParams['figure.figsize'] = (10, 6)
sns.set_style("whitegrid")


## Load the cleaned dataset

In [None]:

df = pd.read_csv("../Data/liver_data.csv")  # adjust if needed
df.head()


## Dataset Overview

In [None]:

print("Shape:", df.shape)
df.info()


## Summary Statistics

In [None]:
df.describe().T

## Target Variable Distribution

In [None]:

sns.countplot(x='Liver_Cirrhosis', data=df)
plt.title('Class Distribution')
plt.show()


## Histograms for Numerical Features

In [None]:

df.hist(figsize=(15, 12), bins=20)
plt.tight_layout()
plt.show()


## Boxplots to Detect Outliers

In [None]:

numeric_cols = df.select_dtypes(include=np.number).columns
for col in numeric_cols:
    sns.boxplot(y=df[col])
    plt.title(col)
    plt.show()


## Correlation Heatmap

In [None]:

corr = df.corr()
sns.heatmap(corr, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()


## Pairplot of Key Features

In [None]:

key_features = ['Direct_Bilirubin', 'ALT', 'AST', 'Albumin', 'Liver_Cirrhosis']
sns.pairplot(df[key_features], hue='Liver_Cirrhosis')
plt.show()


## Key Insights
- Enzyme levels and bilirubin appear elevated in positive cases.
- Slight class imbalance exists.
- Correlated features may inform feature engineering.