In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Download the dataset from the provided link and load it into a pandas DataFrame
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
column_names = ['class', 'cap_shape', 'cap_surface', 'cap_color', 'bruises', 'odor', 'gill_attachment',
                'gill_spacing', 'gill_size', 'gill_color', 'stalk_shape', 'stalk_root', 'stalk_surface_above_ring',
                'stalk_surface_below_ring', 'stalk_color_above_ring', 'stalk_color_below_ring', 'veil_type',
                'veil_color', 'ring_number', 'ring_type', 'spore_print_color', 'population', 'habitat']

df = pd.read_csv(url, names=column_names)

# Step 3: Subset the DataFrame to include the columns for edible/poisonous, odor, and one other column of your choice
subset_columns = ['class', 'odor', 'cap_color']  # You can choose any other column instead of 'cap_color'
df_subset = df[subset_columns]

# Step 4: Rename the columns to be more meaningful
df_subset.columns = ['Edible/Poisonous', 'Odor', 'Cap_Color']

# Step 5: Replace the categorical values with numeric values
# Replace 'e' with 0 (edible) and 'p' with 1 (poisonous) in the 'Edible/Poisonous' column
df_subset['Edible/Poisonous'] = df_subset['Edible/Poisonous'].map({'e': 0, 'p': 1})

# Step 6: Perform exploratory data analysis
# Show distribution of data for each selected column
for column in df_subset.columns:
    plt.figure(figsize=(8, 6))
    sns.countplot(data=df_subset, x=column, hue='Edible/Poisonous')
    plt.title(f'Distribution of {column}')
    plt.xlabel(column)
    plt.ylabel('Count')
    plt.legend(title='Edible/Poisonous')
    plt.show()

# Show scatterplot for Edible/Poisonous vs. Odor
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_subset, x='Odor', y='Edible/Poisonous', hue='Edible/Poisonous')
plt.title('Edible/Poisonous vs. Odor')
plt.xlabel('Odor')
plt.ylabel('Edible/Poisonous')
plt.show()

# Show scatterplot for Edible/Poisonous vs. Cap_Color
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_subset, x='Cap_Color', y='Edible/Poisonous', hue='Edible/Poisonous')
plt.title('Edible/Poisonous vs. Cap Color')
plt.xlabel('Cap Color')
plt.ylabel('Edible/Poisonous')
plt.show()

# Step 7: Preliminary conclusions
# Analyze the scatterplots and distribution plots to draw preliminary conclusions about the selected column's predictiveness

