# Vancouver Theft Incidents Analysis

## Project Overview
This project aims to analyze the distribution of theft incidents in different neighborhoods of Vancouver by utilizing spatiotemporal data mining techniques and building predictive models using machine learning algorithms. By identifying high-crime areas and peak times, exploring potential crime patterns, and predicting the likelihood of theft occurrences at specific time-locations, the project seeks to provide valuable insights to enhance community safety strategies.

## Analysis Objectives
1. Analyze spatial distribution of theft incidents
2. Identify temporal patterns and peak times
3. Develop predictive models for risk assessment
4. Generate actionable insights for safety strategies

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import folium
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Visualization settings
plt.style.use('seaborn')
sns.set_palette('husl')
pd.set_option('display.max_columns', None)

## 1. Data Preprocessing

In [None]:
# Load and prepare data
def load_theft_data():
    """Load theft-related crime data"""
    df = pd.read_csv('crime_data.csv')
    # Filter for theft incidents
    theft_df = df[df['TYPE'].str.contains('Theft', case=False, na=False)]
    return theft_df

# Load data
theft_df = load_theft_data()

# Display basic information
print("Dataset Overview:")
print(f"Total theft incidents: {len(theft_df)}")
print("\nSample of the data:")
theft_df.head()

In [None]:
# Data preprocessing
def preprocess_data(df):
    """Preprocess theft data for analysis"""
    # Convert datetime
    df['DATE'] = pd.to_datetime(df['DATE'])
    
    # Extract temporal features
    df['Year'] = df['DATE'].dt.year
    df['Month'] = df['DATE'].dt.month
    df['Day'] = df['DATE'].dt.day
    df['Hour'] = df['DATE'].dt.hour
    df['DayOfWeek'] = df['DATE'].dt.day_name()
    df['TimeOfDay'] = pd.cut(df['Hour'], 
                            bins=[0,6,12,18,24], 
                            labels=['Night','Morning','Afternoon','Evening'])
    
    return df

# Process data
theft_df = preprocess_data(theft_df)
print("Data preprocessing completed!")

## 2. Initial Data Exploration

In [None]:
# Basic statistics and data quality check
def explore_data_quality(df):
    """Examine data quality and basic statistics"""
    print("Dataset Shape:", df.shape)
    print("\nMissing Values:")
    print(df.isnull().sum())
    print("\nData Types:")
    print(df.dtypes)

explore_data_quality(theft_df)