In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import kagglehub

# Download latest version
path = kagglehub.dataset_download("arshid/iris-flower-dataset")


In [None]:
# Task 1: Load and Explore the Dataset
try:
    # Load the dataset
    dataset_path = path
    data = pd.read_csv(dataset_path)
    
    # Display the first few rows
    print("First few rows of the dataset:")
    print(data.head())
    
    # Check data types and missing values
    print("\nDataset Info:")
    print(data.info())
    print("\nMissing Values:")
    print(data.isnull().sum())
    
    # Clean the dataset (drop rows with missing values)
    data = data.dropna()
    print("\nDataset after cleaning:")
    print(data.info())
except FileNotFoundError:
    print("Error: Dataset file not found. Please check the file path.")

In [None]:
# Task 2: Basic Data Analysis
try:
    # Compute basic statistics
    print("\nBasic Statistics:")
    print(data.describe())
    
    # Group by a categorical column and compute mean of a numerical column
    if 'species' in data.columns and 'sepal_length' in data.columns:
        grouped_data = data.groupby('species')['sepal_length'].mean()
        print("\nMean Sepal Length by Species:")
        print(grouped_data)
    else:
        print("\nRequired columns for grouping not found.")
except Exception as e:
    print(f"An error occurred during analysis: {e}")

In [None]:
# Task 3: Data Visualization
try:
    # Line chart (example: trends over time, replace with appropriate columns if available)
    if 'sepal_length' in data.columns:
        plt.figure(figsize=(8, 5))
        plt.plot(data.index, data['sepal_length'], label='Sepal Length')
        plt.title('Line Chart of Sepal Length')
        plt.xlabel('Index')
        plt.ylabel('Sepal Length')
        plt.legend()
        plt.show()
    
    # Bar chart (example: average sepal length per species)
    if 'species' in data.columns and 'sepal_length' in data.columns:
        grouped_data.plot(kind='bar', figsize=(8, 5), color='skyblue')
        plt.title('Average Sepal Length by Species')
        plt.xlabel('Species')
        plt.ylabel('Average Sepal Length')
        plt.show()
    
    # Histogram (example: distribution of sepal length)
    if 'sepal_length' in data.columns:
        plt.figure(figsize=(8, 5))
        plt.hist(data['sepal_length'], bins=10, color='lightgreen', edgecolor='black')
        plt.title('Histogram of Sepal Length')
        plt.xlabel('Sepal Length')
        plt.ylabel('Frequency')
        plt.show()
    
    # Scatter plot (example: sepal length vs. petal length)
    if 'sepal_length' in data.columns and 'petal_length' in data.columns:
        plt.figure(figsize=(8, 5))
        plt.scatter(data['sepal_length'], data['petal_length'], color='purple', alpha=0.7)
        plt.title('Scatter Plot of Sepal Length vs. Petal Length')
        plt.xlabel('Sepal Length')
        plt.ylabel('Petal Length')
        plt.show()
except Exception as e:
    print(f"An error occurred during visualization: {e}")