# Importing Libraries

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("heart.csv")
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.dtypes

In [None]:
df.shape
print("Number of Rows" , df.shape[0])
print("Number of Columns" , df.shape[1])

# Handling null values

In [None]:
df.isnull().sum()

In [None]:
df.dup = df.duplicated().any()
print(df.dup)

In [None]:
df = df.drop_duplicates()

In [None]:
df.shape

# Data processing

In [None]:
cat_val = []
con_val = []

for column in df.columns:
    if df[column].nunique()<=10:
        cat_val.append(column)
    else:
        con_val.append(column)
    

In [None]:
cat_val


In [None]:
con_val

# Encoding Categorical data

In [None]:
cat_val

In [None]:
df["cp"].unique()

In [None]:
cat_val.remove('sex')
cat_val.remove('target')

In [None]:
df = pd.get_dummies(df,columns= cat_val,drop_first= True)

In [None]:
df.head()

# Feature Scaling

In [None]:
df.head()

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
st= StandardScaler()
df[con_val]=st.fit_transform(df[con_val])

In [None]:
df.head()

# Grouping

In [None]:
df["sex"]

In [None]:
df['sex'].unique()

In [None]:
df['sex'].value_counts()

In [None]:
df['sex'].value_counts().plot(kind = 'bar')
plt.xticks([0,1], ["Male" , "Female"])
plt.show()

In [None]:
df.groupby('sex').mean()

In [None]:
sns.catplot(x='sex',y='target',data=df,kind = 'bar')
plt.show()

# Data Visualisation

# Correlation matrix

In [None]:
plt.figure(figsize = (17,6))
sns.heatmap(df.corr(), annot = True )          

# How many people have heart disease, and how many don't?

In [None]:
df.columns

In [None]:
df["target"].value_counts()

In [None]:
sns.countplot(df['target'])
plt.title("Heart disease count of patients")
plt.show()


# Find Count of  Male & Female in this Dataset

In [None]:
df["sex"].value_counts()

In [None]:
sns.countplot(df["sex"])
plt.xticks([0,1], ["Female" , "Male"])
plt.show()

# Find Gender Distribution According to The Target Variable

In [None]:
sns.countplot(x= "sex", hue = "target", data = df)
plt.xticks([0,1], ["Female","Male"])
plt.legend(labels = ["No Disease", "Disease"])
plt.show()

# Check Age Distribution In The Dataset

In [None]:
sns.distplot(df["age"], bins = 20)
plt.show()

# Check Chest Pain Type

In [None]:
sns.countplot(df["cp"])
plt.xticks([0,1,2,3], ["Typical angina", "atypical angina","non-anginal pain","asymptomatic"])
plt.xticks(rotation = 90)
plt.show()

# Chest Pain Distribution As Per Target Variable

In [None]:
sns.countplot(x= "cp", hue = "target", data = df)
plt.xticks([0,1,2,3], ["Typical angina", "atypical angina","non-anginal pain","asymptomatic"])
plt.xticks(rotation = 90)
plt.legend(labels = ["No Disease", "Disease"])
plt.show()

# Fasting Blood Sugar Distribution According To Target Variable

In [None]:
sns.countplot(x= "fbs", hue = "target", data = df)
plt.legend(labels = ["No Disease", "Disease"])
plt.show()

# Resting Blood Pressure Distribution

In [None]:
df["trestbps"].hist()
plt.show()

# Resting Blood Pressure As Per Sex Column

In [None]:
g= sns.FacetGrid(df , hue = "sex", aspect  = 4)
g.map(sns.kdeplot,'trestbps', shade = True)                
plt.legend(labels = ["Male", "Female"])
plt.show()

# Distribution of Serum cholesterol

In [None]:
df["chol"].hist()
plt.show()

# Plot Continuous Variables

In [None]:
gender_distribution = df['sex'].value_counts(normalize=True) * 100
gender_distribution

In [None]:
plt.figure(figsize=(5, 5))
plt.pie(gender_distribution, labels=gender_distribution.index, autopct='%1.1f%%', startangle=140, colors=['#ff9999','#66b3ff'])
plt.axis('equal')
plt.legend(labels = ["Male", "Female"])
plt.title( 'Gender Distribution')
plt.show()