In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

In [None]:
from google.colab import files
uploaded = files.upload()

Saving heart_attack_south_africa (1).csv to heart_attack_south_africa (1).csv


In [None]:
fig = px.scatter_3d(df, x='Age', y='Cholesterol_Level', z='Blood_Pressure_Systolic',
                   color='Heart_Attack_Outcome', hover_data=['Gender', 'Diabetes_Status', 'Obesity_Index'])
fig.update_layout(title='Heart Attack Risk Factors')
fig.show()

In [None]:
# Build logistic regression model
X = df[['Age', 'Cholesterol_Level']]
y = df['Heart_Attack_Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Create meshgrid and predict risk probabilities
age_range = np.linspace(df['Age'].min(), df['Age'].max(), 100)
chol_range = np.linspace(df['Cholesterol_Level'].min(), df['Cholesterol_Level'].max(), 100)
age_grid, chol_grid = np.meshgrid(age_range, chol_range)
grid_points = np.column_stack((age_grid.ravel(), chol_grid.ravel()))
grid_points_scaled = scaler.transform(grid_points)
risk_probabilities = model.predict_proba(grid_points_scaled)[:, 1]
risk_grid = risk_probabilities.reshape(age_grid.shape)

# Visualize risk surface
fig = go.Figure(data=[go.Surface(x=age_range, y=chol_range, z=risk_grid, colorscale='Viridis')])
fig.update_layout(title='Predicted Heart Attack Risk Surface')
fig.show()


X does not have valid feature names, but StandardScaler was fitted with feature names



In [None]:
from sklearn.cluster import KMeans

# Select risk factors and preprocess data
risk_factors = df[['Age', 'Cholesterol_Level', 'Blood_Pressure_Systolic', 'Obesity_Index']]
scaler = StandardScaler()
risk_factors_scaled = scaler.fit_transform(risk_factors)

# Perform k-means clustering
kmeans = KMeans(n_clusters=3)
kmeans.fit(risk_factors_scaled)
df['Cluster'] = kmeans.labels_

# Visualize clusters
fig = px.scatter_3d(df, x='Age', y='Cholesterol_Level', z='Blood_Pressure_Systolic', color='Cluster', hover_data=['Patient_ID', 'Obesity_Index'])
fig.update_layout(title='Clustering Analysis')
fig.show()

In [None]:
app = dash.Dash(__name__) # Corrected variable name

app.layout = html.Div([
    dcc.Dropdown(id='outcome-dropdown', options=[{'label': 'No Heart Attack', 'value': 0}, {'label': 'Heart Attack', 'value': 1}], value=[0, 1], multi=True),
    dcc.Graph(id='bar-chart'),
    html.Button('Download Filtered Data', id='download-button', n_clicks=0),
    dcc.Download(id='download-data')
])

@app.callback(
    Output('bar-chart', 'figure'),
    Input('outcome-dropdown', 'value')
)
def update_bar_chart(selected_outcomes):
    filtered_df = df[df['Heart_Attack_Outcome'].isin(selected_outcomes)]
    fig = px.bar(filtered_df['Heart_Attack_Outcome'].value_counts(), title='Filtered Heart Attack Outcome Distribution (Bar Chart)')
    return fig

@app.callback(
    Output('download-data', 'data'),
    Input('download-button', 'n_clicks'),
    Input('outcome-dropdown', 'value')
)
def download_filtered_data(n_clicks, selected_outcomes):
    filtered_df = df[df['Heart_Attack_Outcome'].isin(selected_outcomes)]
    # Use the to_csv method directly with send_data_frame
    return dcc.send_data_frame(filtered_df.to_csv, 'filtered_data.csv')

if __name__ == '__main__': # Corrected variable name
    # Use the updated method 'run' instead of 'run_server'
    app.run()

<IPython.core.display.Javascript object>