<a href="https://colab.research.google.com/github/YoussefAli10/iris-analysis-group1/blob/main/iris-project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
print('Hello, Iris by Youssef')

Hello, Iris by Youssef


In [None]:
print('Hello, Iris by Sibo')

Hello, Iris by Sibo


In [None]:
print('Hello, Iris by Wendy')

Hello, Iris by Wendy


In [None]:
# Store basic dataset metadata
n_rows = 150
dataset_name = "Iris Dataset"

print(f"Dataset: {dataset_name}")
print(f"Number of rows: {n_rows}")


Dataset: Iris Dataset
Number of rows: 150


In [None]:
# Example: check petal length threshold
petal_length = 1.7
if petal_length > 1.5:
    print("Large petal")
else:
    print("Small petal")


Large petal


In [None]:
def classify_flower(sepal_length, petal_length):
    """
    Returns a label 'small'/'medium'/'large' based on petal_length.
    (Simple thresholds for demo; not a trained model.)
    """
    if petal_length < 1.5:
        return "small"
    elif petal_length < 4.5:
        return "medium"
    else:
        return "large"

# quick sanity checks
print(classify_flower(5.1, 1.4))  # small
print(classify_flower(6.0, 3.5))  # medium
print(classify_flower(6.3, 5.0))  # large


small
medium
large


In [None]:
# Iterate through Iris species and print them
species = ["setosa", "versicolor", "virginica"]
for s in species:
    print(s)


setosa
versicolor
virginica


In [None]:
import pandas as pd
from IPython.display import display, HTML

# --- Configuration ---
RAW_DATA_FILE = 'iris.csv'
SUMMARY_FILE = 'summary.txt'
CLEANED_DATA_FILE = 'iris_cleaned.csv'

print(f"--- Phase 3: Starting Processing for {RAW_DATA_FILE} ---")

--- Phase 3: Starting Processing for iris.csv ---


In [None]:
# --- Load, Clean, and Write Summary ---

# 1. Load data correctly (using default comma delimiter) and assign columns
df = pd.read_csv(RAW_DATA_FILE, header=None)
df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
initial_rows = len(df)

# 2. Core Cleaning Steps:
df.drop_duplicates(inplace=True)
# Coerce measurement columns to numeric and drop rows with resulting NaNs
df.iloc[:, 0:4] = df.iloc[:, 0:4].apply(pd.to_numeric, errors='coerce')
df.dropna(inplace=True)
final_rows = len(df)

# 3. Calculate Sepal Length Stats (for summary.txt)
stats = df['sepal_length'].agg(['min', 'max', 'mean'])

# 4. Write summary.txt (consolidation of Block 2 logic)
with open(SUMMARY_FILE, mode='w') as outfile:
    outfile.write(f"""
--- IRIS Dataset Summary ---
Total Rows Processed: {initial_rows}
Final Cleaned Rows: {final_rows}

Sepal Length (cm) Statistics:
Minimum: {stats['min']:.2f}
Maximum: {stats['max']:.2f}
Average: {stats['mean']:.2f}
""".strip())

print(f" Summary written to {SUMMARY_FILE} with final row count.")

 Summary written to summary.txt with final row count.


In [None]:
# --- Enhanced Initial Data Report ---
print("\n--- Initial Data Preview ---")
display(
    df.head().style
    .format(formatter={
        'sepal_length': '{:.1f}',
        'sepal_width': '{:.1f}',
        'petal_length': '{:.1f}',
        'petal_width': '{:.1f}'
    })
    .set_table_attributes("style='font-size: 10pt;'")
    .set_caption("First 5 Cleaned Rows")

)

# --- Final Enhanced Cleaning Summary ---
rows_removed_duplicates = initial_rows - final_rows # Simplified calculation

print("\n--- Final Cleaning Summary ---")

cleaning_summary_data = {
    'Metric': ['Initial Rows Loaded', 'Total Rows Removed', 'Final Cleaned Rows'],
    'Value': [initial_rows, rows_removed_duplicates, final_rows]
}
summary_df_vis = pd.DataFrame(cleaning_summary_data)

# Style the final summary table
display(
    summary_df_vis.style
    .set_properties(**{'background-color': '#f0f8ff', 'border-color': 'white'})
    .hide(axis='index')
    .set_caption("Summary of Data Cleaning Steps")
    .set_table_attributes("style='font-size: 12pt;'")
)

# --- Save Cleaned Data ---
df.to_csv(CLEANED_DATA_FILE, index=False)

print(f"\n Final cleaned data saved to {CLEANED_DATA_FILE}.")




--- Initial Data Preview ---


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
1,5.1,3.5,1.4,0.2,Iris-setosa
2,4.9,3.0,1.4,0.2,Iris-setosa
3,4.7,3.2,1.3,0.2,Iris-setosa
4,4.6,3.1,1.5,0.2,Iris-setosa
5,5.0,3.6,1.4,0.2,Iris-setosa



--- Final Cleaning Summary ---


Metric,Value
Initial Rows Loaded,151
Total Rows Removed,4
Final Cleaned Rows,147



 Final cleaned data saved to iris_cleaned.csv.
