In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 6: Ethical Considerations\n",
    "This section evaluates fairness, privacy, and social justice issues in the dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Load dataset\n",
    "try:\n",
    "    df = pd.read_csv('cleaned_items.csv')\n",
    "except FileNotFoundError:\n",
    "    df = pd.read_csv('20191226-items.csv')\n",
    "\n",
    "print('=== Ethical Considerations Analysis ===')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. Check for sensitive attributes\n",
    "sensitive_columns = ['gender', 'race', 'age']\n",
    "print('\\nChecking for sensitive attributes (gender, race, age):')\n",
    "for col in sensitive_columns:\n",
    "    if col in df.columns:\n",
    "        print(f\"Column '{col}' exists with {df[col].nunique()} unique values\")\n",
    "    else:\n",
    "        print(f\"Column '{col}' not found (privacy-friendly)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2. Representation fairness: brand frequency\n",
    "if 'brand' in df.columns:\n",
    "    brand_counts = df['brand'].value_counts()\n",
    "    print('\\nTop 10 most common brands:')\n",
    "    print(brand_counts.head(10))\n",
    "    print('\\nBottom 10 least common brands:')\n",
    "    print(brand_counts.tail(10))\n",
    "\n",
    "    brand_counts.head(10).plot(kind='bar', title='Top 10 Brands Representation')\n",
    "    plt.ylabel('Count')\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3. Price inequality across brands\n",
    "if 'price' in df.columns and 'brand' in df.columns:\n",
    "    brand_price_stats = df.groupby('brand')['price'].mean().sort_values(ascending=False)\n",
    "    print('\\nAverage price per brand (top 10):')\n",
    "    print(brand_price_stats.head(10))\n",
    "\n",
    "    brand_price_stats.head(10).plot(kind='bar', title='Top 10 Brands by Avg Price')\n",
    "    plt.ylabel('Average Price')\n",
    "    plt.show()\n",
    "\n",
    "print('\\nEthical analysis completed. Check charts for visual insights.')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}