# Data Loading and Exploration

This section loads and explores the dataset.

In [None]:

import pandas as pd

# Load the dataset (replace 'iris.csv' with your dataset file path)
try:
    df = pd.read_csv('iris.csv')  # Replace with your dataset file path
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: The dataset file was not found. Please check the file path.")

# Display the first few rows of the dataset
df.head()

# Explore the dataset structure
df.info()

# Check for missing values
df.isnull().sum()


# Basic Data Analysis

This section computes basic statistics and group-based analysis.

In [None]:

# Basic statistics for numerical columns
df.describe()

# Group by a categorical column (e.g., 'species') and compute the mean of a numerical column
grouped_data = df.groupby('species').mean()
grouped_data

# Average Sepal Length per Species
grouped_data['sepal_length']


# Data Visualization

This section visualizes the data with different charts.

In [None]:

import matplotlib.pyplot as plt
import seaborn as sns

# Set the style for the plots
sns.set(style="whitegrid")

# Bar chart comparing petal length across species
sns.barplot(x='species', y='petal_length', data=df)
plt.title('Average Petal Length per Species')
plt.xlabel('Species')
plt.ylabel('Average Petal Length')
plt.show()

# Histogram to understand the distribution of Sepal Length
plt.hist(df['sepal_length'], bins=15, color='skyblue', edgecolor='black')
plt.title('Distribution of Sepal Length')
plt.xlabel('Sepal Length')
plt.ylabel('Frequency')
plt.show()

# Scatter plot to visualize relationship between Sepal Length and Petal Length
sns.scatterplot(x='sepal_length', y='petal_length', data=df, hue='species')
plt.title('Sepal Length vs Petal Length')
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.legend(title='Species')
plt.show()


# Observations and Findings

This section summarizes the analysis and visualizations.

In [None]:

# Observations based on the data analysis
print("1. The average petal length varies significantly across different species.")
print("2. Sepal length and petal length are positively correlated for different species.")
# Add any other findings you may have based on your analysis and visualizations.


# Error Handling for Data Loading

This section handles errors during data loading.

In [None]:

try:
    df = pd.read_csv('iris.csv')  # Replace with your dataset file path
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: The dataset file was not found. Please check the file path.")
except Exception as e:
    print(f"An error occurred: {e}")


# Conclusion

This section summarizes the findings.

In [None]:

# Summarize the findings
print("Conclusion:")
print("Based on the analysis and visualizations, we observe significant differences in petal lengths across species.")
print("The relationship between sepal length and petal length shows a positive correlation, indicating that as sepal length increases, so does petal length.")
