# Exploratory Data Analysis

### Import Libraries

In [None]:
import pandas as pd 
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt
%matplotlib inline
from skimage import io
from skimage.io import imshow

### Loadind Data

In [None]:
# Read CSV file into DataFrame
data = pd.read_csv('data/brain_tumor_dataset.csv', index_col = 0)

# Print DataFrame
data

### Basic Description of the Data

In [None]:
# Identification of data types
data.dtypes

In [None]:
# Size of dataset
data.shape

In [None]:
# Statistical summary for numeric variables
data.describe()

### Target Variable Countplot

In [None]:
# Adjust figure size
plt.figure(figsize = (15, 6))

# Set darkgrid style for all matplotlib and seaborn plots
sns.set_theme(style = "darkgrid")

# Show the number of observations in each class
sns.countplot(data = data, x = 'label_name')

# Adjust graph title and labels title
plt.title('Number of images for each class', fontweight = 'bold')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()

### Sample Image from Each Class

In [None]:
# Path for each image
nt_image = 'data/Training/no_tumor/image (30).jpg'
gt_image = 'data/Training/glioma_tumor/gg (486).jpg'
mt_image = 'data/Training/meningioma_tumor/m (4).jpg'
pt_image = 'data/Training/pituitary_tumor/p (731).jpg'

# Adjust figures size
plt.figure(figsize = (20, 20))

# Main title for all image subplots
plt.suptitle('Sample images from each class', fontweight = 'bold', fontsize = 14, y = 0.64)

# Show no tumor sample image
plt.subplot(1,4,1)
io.imshow(nt_image)
plt.title('No Tumor')
plt.grid(False)
plt.axis('off')

# Show glioma tumor sample image
plt.subplot(1,4,2)
io.imshow(gt_image)
plt.title('Glioma Tumor')
plt.grid(False)
plt.axis('off')

# Show meningioma tumor sample image
plt.subplot(1,4,3)
io.imshow(mt_image)
plt.title('Meningioma Tumor')
plt.grid(False)
plt.axis('off')

# Show pituitary tumor sample image
plt.subplot(1,4,4)
io.imshow(pt_image)
plt.title('Pituitary Tumor')
plt.grid(False)
plt.axis('off')

### Feature Correlation Heat Map

In [None]:
# Adjust figure size
plt.figure(figsize = (15, 10))

# Shows a heatmap of the correlation between the variables
sns.heatmap(data.corr(method = 'pearson'), vmin= -0.5, vmax=1, annot=True)

### Distributions of First Order Features for All Classes

In [None]:
# Adjust figures size
plt.figure(figsize = (20, 20))

# Main title for all first order features subplots
plt.suptitle('Distributions of first order features for all classes', fontweight = 'bold', fontsize = 14, y = 0.91)

# Plot mean distribution for each class using kernel densisty estimation
plt.subplot(3,2,1)
sns.kdeplot(data = data, x = 'mean', hue = 'label', shade = True)
plt.xlabel('Mean')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

# Plot variance distribution for each class using kernel densisty estimation
plt.subplot(3,2,2)
sns.kdeplot(data = data, x = 'variance', hue = 'label', shade = True)
plt.xlabel('Variance')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

# Plot standard deviation distribution for each class using kernel densisty estimation
plt.subplot(3,2,3)
sns.kdeplot(data = data, x = 'std', hue = 'label', shade = True)
plt.xlabel('Standard Deviation')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

# Plot skewness distribution for each class using kernel densisty estimation
plt.subplot(3,2,4)
sns.kdeplot(data = data, x = 'skewness', hue = 'label', shade = True)
plt.xlabel('Skewness')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

# Plot kurtosis distribution for each class using kernel densisty estimation
plt.subplot(3,2,5)
sns.kdeplot(data = data, x = 'kurtosis', hue = 'label', shade = True)
plt.xlabel('Kurtosis')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

### Distributions of Second Order Features for All Classes

In [None]:
# Adjust figures size
plt.figure(figsize = (20, 20))

# Main title for all second order features subplots
plt.suptitle('Distributions of second order features for all classes', fontweight = 'bold', fontsize = 14, y = 0.91)

# Plot entropy distribution for each class using kernel densisty estimation
plt.subplot(4,2,1)
sns.kdeplot(data = data, x = 'entropy', hue = 'label', shade = True)
plt.xlabel('Entropy')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'], loc = 'upper left')

# Plot contrast distribution for each class using kernel densisty estimation
plt.subplot(4,2,2)
sns.kdeplot(data = data, x = 'contrast', hue = 'label', shade = True)
plt.xlabel('Contrast')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

# Plot dissimilarity distribution for each class using kernel densisty estimation
plt.subplot(4,2,3)
sns.kdeplot(data = data, x = 'dissimilarity', hue = 'label', shade = True)
plt.xlabel('Dissimilarity')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

# Plot homogeneity distribution for each class using kernel densisty estimation
plt.subplot(4,2,4)
sns.kdeplot(data = data, x = 'homogeneity', hue = 'label', shade = True)
plt.xlabel('Homogeneity')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

# Plot ASM distribution for each class using kernel densisty estimation
plt.subplot(4,2,5)
sns.kdeplot(data = data, x = 'asm', hue = 'label', shade = True)
plt.xlabel('ASM')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

# Plot energy distribution for each class using kernel densisty estimation
plt.subplot(4,2,6)
sns.kdeplot(data = data, x = 'energy', hue = 'label', shade = True)
plt.xlabel('Energy')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'])

# Plot correlation distribution for each class using kernel densisty estimation
plt.subplot(4,2,7)
sns.kdeplot(data = data, x = 'correlation', hue = 'label', shade = True)
plt.xlabel('Correlation')
plt.legend(['Pituitary Tumor', 'Meningioma Tumor', 'Glioma Tumor', 'No Tumor'], loc = 'upper left')