In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.impute import KNNImputer
from sklearn.linear_model import LinearRegression

df = pd.read_csv('')

print("Dataset Info:")
print(df.info())

print("\nSummary of Numerical Data:")
print(df.describe())

missing_data = df.isna().sum()
missing_percentage = (missing_data / len(df)) * 100
print("\nMissing Data Percentage:")
print(missing_percentage)

plt.figure(figsize=(12, 8))
sns.heatmap(df.isnull(), cbar=False, cmap='viridis')
plt.title('Missing Data Heatmap')
plt.show()

df['numerical_column'] = df['numerical_column'].fillna(df['numerical_column'].mean())

df['categorical_column'] = df['categorical_column'].fillna(df['categorical_column'].mode()[0])

numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
knn_imputer = KNNImputer(n_neighbors=5)
df[numerical_cols] = knn_imputer.fit_transform(df[numerical_cols])

print("\nBefore Imputation Summary:")
print(df.describe())

print("\nAfter Imputation Summary:")
print(df.describe())

plt.figure(figsize=(12, 6))
sns.boxplot(x='numerical_column', data=df)
plt.title('Boxplot After Imputation')
plt.show()

plt.figure(figsize=(12, 6))
sns.histplot(df['numerical_column'], kde=True)
plt.title('Histogram After Imputation')
plt.show()
