In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('/content/Cardiotocographic.csv')

# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(df.head())

# Data Cleaning and Preparation
# Check for missing values
print("\nMissing values in the dataset:")
print(df.isnull().sum())

# If there are missing values, you can choose to drop or fill them
# df.dropna(inplace=True)  # Uncomment this to drop rows with missing values
# df.fillna(value, inplace=True)  # Uncomment this to fill missing values with a specified value

# Check for inconsistencies in data types
print("\nData types of the columns:")
print(df.dtypes)

# Convert columns to appropriate data types if needed
# Example: df['column_name'] = df['column_name'].astype(float)

# Statistical Summary
print("\nStatistical summary of the dataset:")
print(df.describe())

# Quantile Analysis
# Display quantiles
print("\nQuantiles:")
print(df.quantile([0.25, 0.5, 0.75]))

# Custom quantiles
print("\nCustom Quantiles (10th, 90th percentiles):")
print(df.quantile([0.1, 0.9]))

# Calculate IQR for each column and identify outliers
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
outliers = ((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR)))
print("\nOutliers detected:")
print(outliers.sum())

# Data Visualization
# Histograms for numerical variables
df.hist(figsize=(15, 10), bins=20)
plt.suptitle('Histograms of Numerical Variables')
plt.show()

# Boxplots for numerical variables showing quantiles
plt.figure(figsize=(15, 10))
sns.boxplot(data=df)
plt.title('Boxplots of Numerical Variables showing Quantiles')
plt.show()

# Correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()

# Pair plot for deeper insights
sns.pairplot(df)
plt.suptitle('Pair Plot of Variables', y=1.02)
plt.show()

# Pattern Recognition and Insights
# Correlations between variables
correlations = df.corr()
print("\nCorrelation matrix:")
print(correlations)

# Look for patterns or trends (e.g., scatter plots)
# Example scatter plot between two variables
plt.figure(figsize=(8, 6))
sns.scatterplot(x='LB', y='AC', data=df)
plt.title('Scatter Plot: LB vs AC')
plt.show()

# Conclusion
# Summarize key insights
# Example: If you notice a strong correlation or an interesting pattern, you can comment on it here.
