In [None]:
# Step 1: Set Up the Environment
# Objective: Ensure you have the right tools and packages installed.

# 1. Install the required libraries.
# 2. Verify the installation by importing the libraries in a Python script or Jupyter notebook


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris


# Step 2: Load & Explore the Dataset
# Objective: Load data into a pandas DataFrame and obtain a basic understanding of its structure.

# 3. Load a CSV file into a DataFrame.
# 4. Display the first few records to understand the structure.
# 5. Get a summary of the dataset.

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)

print(df.head())

print(df.info())


# Step 3: Perform NumPy Operations
# Objective: Utilize NumPy for basic numerical operations and array manipulations.

# 6. Convert a DataFrame column to a NumPy array and perform array operations like mean and sum.
# 7. Create a NumPy array and calculate the variance and standard deviation.
# 8. Use NumPy to filter based on conditions.


# 6. Convert a DataFrame column to a NumPy array and perform array operations like mean and sum.
sepal_length_array = df['sepal length (cm)'].to_numpy()
print("Mean of Sepal Length:", np.mean(sepal_length_array))
print("Sum of Sepal Length:", np.sum(sepal_length_array))

# 7. Create a NumPy array and calculate the variance and standard deviation.
array = np.array([1, 2, 3, 4, 5])
print("Variance of array:", np.var(array))
print("Standard Deviation of array:", np.std(array))

# 8. Use NumPy to filter based on conditions.
filtered_array = sepal_length_array[sepal_length_array > 5.0]
print("Filtered Sepal Lengths > 5.0:", filtered_array)


# Step 4: Data Manipulation with Pandas
# Objective: Use Pandas to clean and manipulate dataset for analysis.

# 9. Handle missing data by filling or dropping.
# 10. Create new columns or modify existing ones.
# 11. Use groupby to aggregate data.


# 9. Handle missing data by filling or dropping.
df.fillna(0, inplace=True)  # No missing data but performing as instructed

# 10. Create new columns or modify existing ones.
df['sepal area (cm^2)'] = df['sepal length (cm)'] * df['sepal width (cm)']

# 11. Use groupby to aggregate data.
grouped_df = df.groupby('species').mean()
print(grouped_df)



# Step 5: Data Visualization with Matplotlib & Seaborn
# Objective: Visualize the data to identify patterns, trends, and insights.

# 12. Use Matplotlib to create a basic plot.
# 13. Create a histogram using Seaborn.
# 14. Plot a box plot for a clear view of data distribution.


plt.plot(df['sepal length (cm)'])
plt.title('Basic Plot of Sepal Length')
plt.xlabel('Index')
plt.ylabel('Sepal Length (cm)')
plt.show()

# 13. Create a histogram using Seaborn.
sns.histplot(df['sepal width (cm)'], bins=10)
plt.title('Histogram of Sepal Width')
plt.show()

# 14. Plot a box plot for a clear view of data distribution.
sns.boxplot(x=df['sepal width (cm)'])
plt.title('Box Plot of Sepal Width')
plt.show()


