In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Crime Data Generation\n",
    "\n",
    "This notebook generates synthetic crime data for India using Python and Faker library."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup and Imports\n",
    "\n",
    "First, import necessary libraries and initialize parameters."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from faker import Faker\n",
    "import random\n",
    "from datetime import datetime, timedelta\n",
    "\n",
    "# Initialize Faker and other parameters\n",
    "fake = Faker('en_IN')\n",
    "Faker.seed(0)\n",
    "np.random.seed(0)\n",
    "random.seed(0)\n",
    "\n",
    "# Define some constants\n",
    "areas = [\"Mumbai\", \"Delhi\", \"Bangalore\", \"Hyderabad\", \"Chennai\", \"Kolkata\"]\n",
    "crime_categories = [\"Violent Crime\", \"Property Crime\", \"Drug Offense\", \"Cyber Crime\", \"White Collar Crime\"]\n",
    "premises_desc = [\"Residential\", \"Commercial\", \"Public\", \"Industrial\"]\n",
    "status_desc = [\"Open\", \"Closed\", \"Under Investigation\", \"Solved\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Generation Function\n",
    "\n",
    "Define a function to generate synthetic crime data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to generate synthetic data\n",
    "def generate_crime_data(num_records):\n",
    "    data = []\n",
    "    \n",
    "    for _ in range(num_records):\n",
    "        incident_id = fake.uuid4()\n",
    "        district = random.choice([\"North\", \"South\", \"East\", \"West\", \"Central\"])\n",
    "        incident_date = fake.date_this_decade()\n",
    "        incident_time = fake.time(pattern=\"%H:%M:%S\")\n",
    "        day_of_week = incident_date.strftime(\"%A\")\n",
    "        month = incident_date.strftime(\"%B\")\n",
    "        year = incident_date.year\n",
    "        time_of_day = \"Morning\" if int(incident_time.split(\":\")[0]) < 12 else \"Afternoon\" if int(incident_time.split(\":\")[0]) < 18 else \"Evening\"\n",
    "        daytime_weekday = \"Daytime\" if int(incident_time.split(\":\")[0]) < 18 else \"Night\"\n",
    "        weekday_or_weekend = \"Weekend\" if incident_date.weekday() >= 5 else \"Weekday\"\n",
    "        crime_category = random.choice(crime_categories)\n",
    "        repeat_offender = random.choice([\"Yes\", \"No\"])\n",
    "        crime_location_type = random.choice(premises_desc)\n",
    "        arrest_made = random.choice([\"Yes\", \"No\"])\n",
    "        charge_filed = random.choice([\"Yes\", \"No\"])\n",
    "        outcome = random.choice(status_desc)\n",
    "\n",
    "        data.append({\n",
    "            \"Incident ID\": incident_id,\n",
    "            \"District\": district,\n",
    "            \"Incident Date\": incident_date,\n",
    "            \"Incident Time\": incident_time,\n",
    "            \"Day of Week\": day_of_week,\n",
    "            \"Month\": month,\n",
    "            \"Year\": year,\n",
    "            \"Time of Day\": time_of_day,\n",
    "            \"Daytime/Weekend/Weekday\": daytime_weekday,\n",
    "            \"Crime Category\": crime_category,\n",
    "            \"Repeat Offender\": repeat_offender,\n",
    "            \"Crime Location Type\": crime_location_type,\n",
    "            \"Arrest Made\": arrest_made,\n",
    "            \"Charge Filed\": charge_filed,\n",
    "            \"Outcome\": outcome\n",
    "        })\n",
    "    \n",
    "    df = pd.DataFrame(data)\n",
    "    return df\n",
    "\n",
    "# Example usage:\n",
    "# crime_data_df = generate_crime_data(1000)\n",
    "# crime_data_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Saving Data\n",
    "\n",
    "Save the generated data to a CSV file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Example of saving data\n",
    "# crime_data_df.to_csv('synthetic_crime_data_india.csv', index=False)\n",
    "# print(f\"Generated {num_records} records and saved to 'synthetic_crime_data_india.csv'\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
