This code demonstrates various operations for exploring data distributions and correlations using the Pandas and Matplotlib libraries. It includes creating histograms, box plots, kernel density plots, and bar plots to visualize data distributions. Additionally, it calculates the correlation matrix and creates a heatmap to visualize the correlations between columns in the dataset.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Read data from a CSV file
df = pd.read_csv('diabetes.csv')

In [None]:
# Display basic information about the DataFrame
print("Data information:")
print(df.info())

# Data distributions

In [None]:
# Create a histogram of a specific column
plt.hist(df['Age'], bins=10)
plt.title("Histogram")
plt.xlabel("Values")
plt.ylabel("Frequency")
plt.show()

In [None]:
# Create a box plot of a specific column
plt.boxplot(df['Age'])
plt.title("Box Plot")
plt.ylabel("Values")
plt.show()

In [None]:
# Create a kernel density plot of a specific column
df['Age'].plot.kde()
plt.title("Kernel Density Plot")
plt.xlabel("Values")
plt.ylabel("Density")
plt.show()

In [None]:
# Create a bar plot of value counts for a specific column
value_counts = df['Age'].value_counts()
plt.bar(value_counts.index, value_counts.values)
plt.title("Value Counts")
plt.xlabel("Values")
plt.ylabel("Counts")
plt.show()

# Data correlations

In [None]:
# Calculate the correlation matrix
correlation_matrix = df.corr()

In [None]:
# Create a heatmap of the correlation matrix
plt.imshow(correlation_matrix, cmap='coolwarm', interpolation='nearest')
plt.title("Correlation Matrix")
plt.colorbar()
plt.xticks(range(len(df.columns)), df.columns, rotation=90)
plt.yticks(range(len(df.columns)), df.columns)
plt.show()


In [None]:
# Select specific columns for correlation analysis
selected_columns = ['Age', 'BMI', 'BloodPressure']

In [None]:
# Calculate the correlation matrix for the selected columns
selected_corr_matrix = df[selected_columns].corr()

In [None]:
# Create a heatmap of the correlation matrix for the selected columns
plt.imshow(selected_corr_matrix, cmap='coolwarm', interpolation='nearest')
plt.title("Correlation Matrix (Selected Columns)")
plt.colorbar()
plt.xticks(range(len(selected_columns)), selected_columns, rotation=90)
plt.yticks(range(len(selected_columns)), selected_columns)
plt.show()