In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv('data/heartdisease.csv')
df.head(5)

In [None]:
df.isnull().sum()

In [None]:
#Cleaning
df = df.dropna()
df.isnull().sum()

In [None]:
# 2. Integration
subset1 = df[['Age', 'Sex', 'Chol']]                         
subset2 = df.drop(['Age', 'Sex', 'Chol'], axis=1)            
df_integrated = pd.concat([subset1, subset2], axis=1)
df_integrated.head(5)

In [None]:
# Add a dummy target column for model building (example logic)
# This step is optional if your exam data set not have target field
df_integrated['target'] = (df_integrated['Chol'] > 240).astype(int)

In [None]:
df_integrated.head()

In [None]:
# Encode categorical columns
X = df_integrated.drop('target', axis=1)
y = df_integrated['target']
for col in X.columns:
    if X[col].dtype == 'object':
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])

In [None]:
#3. Transformation
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_df = pd.DataFrame(X_scaled, columns=X.columns)

In [None]:
# 4. Error Correction 
if 'chol' in X_df.columns:
    X_df['chol'] = X_df['chol'].apply(lambda x: X_df['chol'].mean() if x < 0 else x)

In [None]:
# 5. Model Building
X_train, X_test, y_train, y_test = train_test_split(X_df, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))