# 1. Import The Libraries And Dataset


In [52]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [53]:
data = pd.read_csv('../input/heart-disease-dataset/heart.csv')

# 2. Display Top 5 Rows of The Dataset


In [54]:
data.head(5)

# 3. Check The Last 5 Rows of The Dataset


In [55]:
data.tail(5)

# 4. Find Shape of Our Dataset (Number of Rows And Number of Columns)


In [56]:
#shape is not a method it is an attribute
data.shape

In [57]:
print("Number of Rows:", data.shape[0])
print("Number of Columns:", data.shape[1])

# 5. Get Information About Our Dataset Like Total Number Rows, Total Number of Columns, Datatypes of Each Column And Memory Requirement


In [58]:
#all this we can easily get through the info method of pandas
data.info()

The below are the things that we wanted from the data and we got through info method
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
13  target    1025 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 112.2 KB

# 6. Check Null Values In The Dataset


In [59]:
#sum() function return the sum of the values for the requested axis. If the input is index axis then it adds all the values in a column and repeats the same for all the columns and returns a series containing the sum of all the values in each column.
data.isnull().sum()

so in the above output we can see that all the columns shows zero which means data has no null values

# 7. Check For Duplicate Data and Drop Them


In [60]:
data_dup = data.duplicated().any()
print(data_dup)

In [61]:
data = data.drop_duplicates()

In [62]:
data.shape

# 8. Get Overall Statistics About The Dataset


In [63]:
#describe method
data.describe()

# 9. Draw Correlation Matrix 


In [64]:
#to check correlation between different methods
plt.figure(figsize=(17,6))
sns.heatmap(data.corr(),annot=True)

# 10. How Many People Have Heart Disease, And How Many Don't Have Heart Disease In This Dataset?


In [65]:
data.columns

In [66]:
data['target'].value_counts()

In [67]:
sns.countplot(data['target'])

# 11. Find Count of  Male & Female in this Dataset


In [68]:
data.columns

In [69]:
data['sex'].value_counts()

In [70]:
sns.countplot(data['sex'])
plt.xticks([0,1],['Female', 'Male'])
plt.show()

# 12. Find Gender Distribution According to The Target Variable


In [71]:
sns.countplot(x='sex',hue="target",data=data)
plt.xticks([1,0],['Male','Female'])
plt.legend(labels = ['No-Disease','Disease'])
plt.show()

# 13. Check Age Distribution In The Dataset


In [72]:
sns.displot(data['age'],bins=20)
plt.show()

# 14. Check Chest Pain Type


chest pain type(4 Values)

    a. value 0 : typical angina
    
    b. value 1 : atypical angina
    
    c. value 2 : non-anginal pain
    
    d. value 3 : asymptomatic
    

In [73]:
sns.countplot(data['cp'])
plt.xticks([0,1,2,3],["typical angina", "atypical angina", "non-anginal pain","asymptomatic"])
plt.xticks(rotation=75)
plt.show()

# 15. Show The Chest Pain Distribution As Per Target Variable


In [74]:
data.columns

In [75]:
sns.countplot(x="cp", hue="target", data=data)
plt.legend(labels = ['No-Disease','Disease'])
plt.show()

# 16. Show Fasting Blood Sugar Distribution According To Target Variable


In [76]:
sns.countplot(x="fbs", hue="target", data=data)
plt.legend(labels = ['No-Disease','Disease'])
plt.show()

# 17.  Check Resting Blood Pressure Distribution


In [77]:
data['trestbps'].hist()

# 18. Compare Resting Blood Pressure As Per Sex Column


In [78]:
#facetgrid() is useful when u want to visualize the distribution of 2 variables or multivariables separately
g = sns.FacetGrid(data,hue="sex",aspect=4)
g.map(sns.kdeplot, 'trestbps', shade=True)
plt.legend(labels=['Male', 'Female'])

# 19. Show Distribution of Serum cholesterol


In [79]:
data['chol'].hist()

# 20. Plot Continuous Variables

In [80]:
cate_val=[]
cont_val=[]

for column in data.columns:
    if data[column].nunique() <=10:
        cate_val.append(column)
    else:
        cont_val.append(column)

In [81]:
cate_val

In [82]:
cont_val

In [83]:
data.hist(cont_val,figsize=(15,6))
plt.tight_layout()
plt.show()