In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# To display plots inline
%matplotlib inline

# Load the dataset (Assuming the file is named 'heart.csv')
df = pd.read_csv('heart.csv')  # No need to upload the file again

# Display basic information
df.info()

# Check for null values
print("Null values in the dataset:")
print(df.isnull().sum())

# Correlation matrix
corr_matrix = df.corr()

# Heatmap of the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm")
plt.title('Correlation Matrix of Heart Disease Dataset')
plt.show()

# Histograms of each numerical feature
df.hist(bins=30, figsize=(15, 10))
plt.suptitle('Distribution of Numerical Features')
plt.show()

# Define features and target (Assume 'target' is the column representing heart disease condition)
# Change 'target' to the actual name of the target column in your dataset
X = df.drop('target', axis=1)  # Drop target variable
y = df['target']  # Target variable (heart disease condition)

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert the Scaled Data to DataFrame for Better Visualization
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X.columns)

# Display the First Few Rows of Scaled and Non-Scaled Training and Testing Data

# Non-scaled Training and Testing Data
print("Non-Scaled Training Data:")
print(X_train.head())

print("\nNon-Scaled Testing Data:")
print(X_test.head())

# Scaled Training and Testing Data
print("\nScaled Training Data:")
print(X_train_scaled_df.head())

print("\nScaled Testing Data:")
print(X_test_scaled_df.head())
