In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set the aesthetics for the plots
sns.set(style="whitegrid")

# Load the dataset
file_path = 'Community_Safety_Data.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataframe
data.head()

In [None]:
# Convert 'rep_date' to datetime
data['rep_date'] = pd.to_datetime(data['rep_date'].astype(str), errors='coerce')

data['rep_date'] = pd.to_datetime(data['rep_date'], format = '%Y %m %d %H:%M:%S')
# print(crime_df.head())
# Check the conversion
data.info()

# Descriptive statistics for datetime column
#data['rep_date'].describe(datetime_is_numeric=True)

In [None]:
# Visualize the number of crimes over time
plt.figure(figsize=(15, 6))
sns.histplot(data['rep_date'].dt.date, bins=100, kde=False)
plt.title('Crime Reports Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Crimes Reported')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Extract year and month from 'rep_date'
data['year'] = data['rep_date'].dt.year
data['month'] = data['rep_date'].dt.month_name()

# Group the data by 'municipality', 'year', 'month', and 'occ_type'
grouped_data = data.groupby(['municipality', 'year', 'month', 'occ_type']).size().reset_index(name='counts')

# Pivot the data to have months as columns and counts as values
pivot_table = grouped_data.pivot_table(index=['municipality', 'year', 'occ_type'], columns='month', values='counts', fill_value=0)

# Display the pivot table
pivot_table.head()

In [None]:
# Visualize the count of 'occ_type' for each 'municipality' across 'year' and 'month'
for municipality in pivot_table.index.get_level_values(0).unique():
    municipality_data = pivot_table.xs(municipality, level='municipality')
    plt.figure(figsize=(20, 10))
    sns.heatmap(municipality_data, annot=True, fmt="d", linewidths=.5, cmap="YlGnBu")
    plt.title(f'Crime Count by Occurrence Type in {municipality} (Year and Month)')
    plt.ylabel('Year and Occurrence Type')
    plt.xlabel('Month')
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.show()

In [None]:
# Group the data by 'municipality', 'year', and 'occ_type' and get the counts
yearly_data = data.groupby(['municipality', 'year', 'occ_type']).size().reset_index(name='counts')

# Pivot the data to have years as columns and counts as values
yearly_pivot = yearly_data.pivot_table(index=['municipality', 'occ_type'], columns='year', values='counts', fill_value=0)

# Display the pivot table
yearly_pivot.head()

In [None]:
# Visualize the count of 'occ_type' for each 'municipality' across 'year'
for municipality in yearly_pivot.index.get_level_values(0).unique():
    municipality_yearly_data = yearly_pivot.xs(municipality, level='municipality')
    plt.figure(figsize=(20, 10))
    sns.heatmap(municipality_yearly_data, annot=True, fmt="d", linewidths=.5, cmap="YlGnBu")
    plt.title(f'Crime Count by Occurrence Type in {municipality} (Year)')
    plt.ylabel('Occurrence Type')
    plt.xlabel('Year')
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.show()

In [None]:
# Create a bar chart for the count of 'occ_type' for each 'municipality' across 'year'
plt.figure(figsize=(20, 10))
sns.countplot(x='year', hue='municipality', data=data)
plt.title('Count of Occurrences by Year and Municipality')
plt.xlabel('Year')
plt.ylabel('Count of Occurrences')
plt.legend(title='Municipality')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Create a bar chart for the count of 'LocationCode' for each 'municipality' across 'year'
plt.figure(figsize=(20, 10))
sns.countplot(x='year', hue='LocationCode', data=data)
plt.title('Count of Location Codes by Year and Municipality')
plt.xlabel('Year')
plt.ylabel('Count of Location Codes')
plt.legend(title='Location Code', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Group the data by 'year', 'municipality', and 'LocationCode' and get the counts
location_data = data.groupby(['year', 'municipality', 'LocationCode']).size().reset_index(name='counts')

# Create a bar chart
plt.figure(figsize=(20, 10))
sns.barplot(x='year', y='counts', hue='municipality', data=location_data)
plt.title('Count of Occurrences by Location Code, Municipality, and Year')
plt.xlabel('Year')
plt.ylabel('Count of Occurrences')
plt.legend(title='Municipality')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Filter the data for LocationCode 'Residence'
residence_data = data[data['LocationCode'] == 'Residence']

# Group the filtered data by 'year', 'municipality' and get the counts
residence_counts = residence_data.groupby(['year', 'municipality']).size().reset_index(name='counts')

# Create a bar chart for the filtered data
plt.figure(figsize=(20, 10))
sns.barplot(x='year', y='counts', hue='municipality', data=residence_counts)
plt.title('Count of Residence Occurrences by Municipality and Year')
plt.xlabel('Year')
plt.ylabel('Count of Residence Occurrences')
plt.legend(title='Municipality')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Filter the data for LocationCode 'Business'
business_data = data[data['LocationCode'] == 'Business']

# Group the filtered data by 'year', 'municipality' and get the counts
business_counts = business_data.groupby(['year', 'municipality']).size().reset_index(name='counts')

# Create a bar chart for the filtered data
plt.figure(figsize=(20, 10))
sns.barplot(x='year', y='counts', hue='municipality', data=business_counts)
plt.title('Count of Business Occurrences by Municipality and Year')
plt.xlabel('Year')
plt.ylabel('Count of Business Occurrences')
plt.legend(title='Municipality')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Group the data by 'rep_date' and 'municipality' and get the counts
trend_data = data.groupby(['rep_date', 'municipality']).size().reset_index(name='counts')

# Pivot the data to have dates as index and municipalities as columns
trend_pivot = trend_data.pivot(index='rep_date', columns='municipality', values='counts').fillna(0)

# Plot the trends for each municipality
plt.figure(figsize=(20, 10))
for municipality in trend_pivot.columns:
    plt.plot(trend_pivot.index, trend_pivot[municipality], label=municipality)

plt.title('Trend of Occurrences for Each Municipality Over Time')
plt.xlabel('Date')
plt.ylabel('Count of Occurrences')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Visualize the number of crimes over time
plt.figure(figsize=(15, 6))
sns.histplot(data['rep_date'].dt.date, bins=100, kde=False)
plt.title('Crime Reports Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Crimes Reported')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Visualize the number of crimes over time
plt.figure(figsize=(15, 6))
sns.histplot(data['rep_date'].dt.date, bins=100, kde=False)
plt.title('Crime Reports Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Crimes Reported')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Convert 'rep_date' to datetime
data['rep_date'] = pd.to_datetime(data['rep_date'].astype(str), errors='coerce')

# Check the conversion
data.info()

# Descriptive statistics for datetime column
data['rep_date'].describe(datetime_is_numeric=True)

In [None]:
# Group the data by 'municipality' and 'rep_date' and get the counts
daily_data = data.groupby(['municipality', data['rep_date'].dt.date]).size().reset_index(name='counts')

# Find the municipality with the highest average daily crime count
municipality_risk = daily_data.groupby('municipality')['counts'].mean().reset_index()
highest_risk_municipality = municipality_risk.loc[municipality_risk['counts'].idxmax()]

# result: The analysis indicates that the municipality of Vaughan has the highest average daily crime count, with approximately 38.04 incidents per day. This suggests that Vaughan may be at the highest risk of crime increase compared to other municipalities in the dataset.
# Display the municipality with the highest risk
highest_risk_municipality

In [None]:
# Load the Income and Population data
income_population_data = pd.read_csv('Income_Population_Data.csv')

# Merge the crime data with the income and population data
merged_data = pd.merge(data, income_population_data, on='municipality', how='left')

# Display the first few rows of the merged dataset
merged_data.head()

In [None]:
# Ensure the 'counts' column is created by summing up the occurrences for each municipality
municipality_crime_counts = data.groupby('municipality').size().reset_index(name='counts')

# Merge the crime counts with the income and population data
merged_data = pd.merge(income_population_data, municipality_crime_counts, on='municipality', how='left')

# Check the correlation between average household income, population, and crime occurrences
correlation_matrix = merged_data[['avg_household_income', 'population', 'counts']].corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Matrix for Average Household Income, Population, and Crime Occurrences')
plt.show()

In [None]:
# Group the data by municipality, year, and month
monthly_data = data.groupby(['municipality', data['year'], data['month']]).size().reset_index(name='counts')

# Sort the data by year and month
monthly_data.sort_values(by=['municipality', 'year', 'month'], inplace=True)

# Display the first few rows of the sorted data
monthly_data.head()

In [None]:
# Ensure 'rep_date' is in datetime format
data['rep_date'] = pd.to_datetime(data['rep_date'])

# Group the data by municipality, year, and month
monthly_data = data.groupby(['municipality', data['year'], data['month']]).size().reset_index(name='counts')

# Rename columns for clarity
#monthly_data.rename(columns={monthly_data.columns[1]: 'Year', monthly_data.columns[2]: 'Month'}, inplace=True)

# Sort the data by year and month
monthly_data.sort_values(by=['municipality', 'year', 'month'], inplace=True)

# Display the first few rows of the sorted data
monthly_data.head()

In [None]:
# Group the data by municipality and year
yearly_data = data.groupby(['municipality', data['year']]).size().reset_index(name='counts')

# Sort the data by year
yearly_data.sort_values(by=['municipality', 'year'], inplace=True)

# Display the first few rows of the sorted data
yearly_data.head()

In [None]:
# Group the data by municipality and year
monthly_data = data.groupby(['municipality', data['year'], date['month']]).size().reset_index(name='counts')

# Rename columns for clarity
#yearly_data.rename(columns={yearly_data.columns[1]: 'year'}, inplace=True)

# Sort the data by year
monthly_data.sort_values(by=['municipality', 'year','month'], inplace=True)

# Display the first few rows of the sorted data
monthly_data.head()

In [None]:
# Create a bar chart for the count of occurrences per municipality per year
plt.figure(figsize=(15, 8))
sns.barplot(x='year', y='counts', hue='municipality', data=yearly_data)
plt.title('Count of Occurrences per Municipality per Year')
plt.xlabel('Year')
plt.ylabel('Count of Occurrences')
plt.legend(title='Municipality')
plt.show()

In [None]:
# Load the District Platoon Staffing Exceptions data
staffing_exceptions_data = pd.read_csv('District_Platoon_Staffing_Exceptions.csv')

# Display the first few rows of the data
staffing_exceptions_data.head()

In [None]:
# Convert 'Exception Date' to datetime format
staffing_exceptions_data['Exception Date'] = pd.to_datetime(staffing_exceptions_data['Exception Date'])

# Analyze the patterns in staffing exceptions
# Group by year, month, and day of week
grouped_data = staffing_exceptions_data.groupby([staffing_exceptions_data['year'], staffing_exceptions_data['month'], staffing_exceptions_data['day']]).size().reset_index(name='counts')

# Display the first few rows of the grouped data
grouped_data.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set the aesthetic style of the plots
sns.set_style('whitegrid')

# Plotting the staffing exceptions
plt.figure(figsize=(15, 6))
sns.lineplot(data=grouped_data, x='month', y='counts', hue='year', marker='o')
plt.title('Staffing Exceptions by Month and Year')
plt.xlabel('Month')
plt.ylabel('Number of Staffing Exceptions')
plt.legend(title='Year')
plt.xticks(range(1, 13))
plt.show()

In [None]:
# Convert 'Exception Date' to datetime format
staffing_exceptions_data['Exception Date'] = pd.to_datetime(staffing_exceptions_data['Exception Date'])

# Analyze the patterns in staffing exceptions
# Group by year, month, and day of week
grouped_data = staffing_exceptions_data.groupby([staffing_exceptions_data['Exception Date']] ).size().reset_index(name='counts').sort_values(by='Exception Date')

# Display the first few rows of the grouped data
grouped_data.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set the aesthetic style of the plots
sns.set_style('whitegrid')

# Plotting the staffing exceptions
plt.figure(figsize=(15, 6))
sns.lineplot(data=grouped_data, x='Exception Date', y='counts', marker='o')
plt.title('Staffing Exceptions by Month and Year')
plt.xlabel('Date')
plt.ylabel('Number of Staffing Exceptions')
#plt.legend(title='Year')
#plt.xticks(range(1, grouped_data['Exception Date'].nunique()+1))
plt.show()