In [None]:
import pandas as pd
from datetime import datetime
file_path = 'crime_dataset_india.csv'
df = pd.read_csv(file_path)
df['Report Number'] = df['Report Number'].fillna('Unknown')
date_columns = ['Date Reported', 'Date of Occurrence', 'Time of Occurrence', 'Date Case Closed']
for col in date_columns:
    if col in df.columns:
        df[col] = pd.to_datetime(df[col], format='%d-%m-%Y %H:%M', errors='coerce')
df['Date of Occurrence'] = df['Date of Occurrence'].fillna(pd.Timestamp('1900-01-01'))
df['Day of Week'] = df['Date of Occurrence'].dt.day_name()
df['Hour of Occurrence'] = df['Time of Occurrence'].dt.hour
if 'Date Reported' in df.columns and 'Date Case Closed' in df.columns:
    df['Case Duration'] = (df['Date Case Closed'] - df['Date Reported']).dt.days
df['Victim Gender'] = df['Victim Gender'].replace({'M': 'Male', 'F': 'Female', 'X': 'Other'})
numeric_columns = ['Victim Age', 'Police Deployed']
for col in numeric_columns:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
df['Case Closed Status'] = df['Case Closed'].map({'Yes': 1, 'No': 0})

def categorize_crime(crime):
    if pd.isnull(crime):
        return 'Other Crime'
    crime = crime.upper()
    if crime in ['ROBBERY']:
        return 'Robbery'
    elif crime in ['VANDALISM', 'SHOPLIFTING']:
        return 'Vandalism'
    elif crime in ['BURGLARY', 'VEHICLE - STOLEN']:
        return 'Property Crime'
    elif crime in ['HOMICIDE', 'ASSAULT', 'SEXUAL ASSAULT', 'DOMESTIC VIOLENCE', 'KIDNAPPING']:
        return 'Violent Crime'
    elif crime in ['DRUG OFFENSE']:
        return 'Drug Crime'
    elif crime in ['FRAUD', 'COUNTERFEITING', 'IDENTITY THEFT']:
        return 'Financial Crime'
    elif crime in ['ARSON']:
        return 'Arson'
    elif crime in ['TRAFFIC VIOLATION']:
        return 'Traffic Violation'
    elif crime in ['CYBERCRIME']:
        return 'Cybercrime'
    elif crime in ['EXTORTION', 'PUBLIC INTOXICATION', 'ILLEGAL POSSESSION', 'FIREARM OFFENSE']:
        return 'Other Crime'
    else:
        return 'Other Crime'

df['Crime Category'] = df['Crime Description'].apply(categorize_crime)
df.fillna({'Victim Age': 0, 'Police Deployed': 0, 'Day of Week': 'Unknown'}, inplace=True)
output_file = 'preprocessed_crime_dataset_india.csv'
df.to_csv(output_file, index=False)

print(f"Preprocessing complete {output_file}")
