In [None]:
# Data loading + clean EDA
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

csv_path = 'sample_iris.csv'
if os.path.exists(csv_path):
    df = pd.read_csv(csv_path)
else:
    from sklearn.datasets import load_iris
    iris = load_iris(as_frame=True)
    df = iris.frame
    df.columns = list(iris.feature_names) + ['target']

print('Shape:', df.shape)
display(df.head())
print('\nDtypes:\n', df.dtypes)
print('\nSummary statistics:')
display(df.describe(include='all'))
print('\nMissing values per column:')
print(df.isnull().sum())

# Histograms for numeric columns
numeric = df.select_dtypes(include=[np.number])
numeric.hist(figsize=(10,8))
plt.tight_layout()
plt.show()

# Correlation heatmap
plt.figure(figsize=(6,5))
sns.heatmap(numeric.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation matrix')
plt.show()

# Pairplot (may take some time)
sns.pairplot(df, vars=numeric.columns, hue='target')
plt.show()

# Capstone Project - Exploratory Data Analysis
This notebook contains data loading, brief EDA, modeling, and report export steps. The corrupted EDA cell was removed and replaced with a clean EDA cell.

In [5]:
# Run the exported script to generate Markdown and HTML reports
print('Running export_results.py from notebook kernel')
exec(open('export_results.py', 'r', encoding='utf-8').read())

Running export_results.py from notebook kernel
Output directory: c:\Users\user\Downloads\Capstone project
Saved confusion matrix to c:\Users\user\Downloads\Capstone project\confusion_matrix.png
Wrote Markdown report to c:\Users\user\Downloads\Capstone project\results_report.md
Wrote HTML report to c:\Users\user\Downloads\Capstone project\results_report.html

Done.
