In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from random import choice
import folium
from folium.plugins import HeatMap
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import joblib

# Create a synthetic dataset
np.random.seed(0)

# Generate date range
start_date = datetime(2022, 1, 1)
end_date = datetime(2022, 12, 31)
date_range = [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]

# Generate crime data
n_samples = 1000
crime_types = ['Theft', 'Assault', 'Vandalism', 'Burglary', 'Drug Offense']
locations = ['Dormitory', 'Classroom Building', 'Library', 'Recreation Center', 'Cafeteria']
months = [d.strftime('%B') for d in date_range]
crime_data = []

for _ in range(n_samples):
    date = choice(date_range)
    crime_type = choice(crime_types)
    location = choice(locations)
    month = date.strftime('%B')
    latitude = round(np.random.uniform(37.0, 37.1), 6)
    longitude = round(np.random.uniform(-122.0, -121.9), 6)

    crime_data.append([date, crime_type, location, month, latitude, longitude])

# Create a DataFrame
columns = ['Date', 'Crime Type', 'Location', 'Month', 'Latitude', 'Longitude']
df = pd.DataFrame(crime_data, columns=columns)

# Feature engineering: One-hot encode categorical variables
encoder = OneHotEncoder(drop='first', sparse=False)
encoded_features = encoder.fit_transform(df[['Location', 'Month']])
encoded_df = pd.concat([df[['Latitude', 'Longitude']], pd.DataFrame(encoded_features)], axis=1)

# Create a base map
m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=10)

# Add markers with intensity based on the number of unique crime incidents
locations = list(zip(df['Latitude'], df['Longitude']))
heat_map = HeatMap(locations, radius=15)
heat_map.add_to(m)

# Save the map to an HTML file
m.save('crime_intensity_map.html')

# Create a grid of latitude and longitude coordinates
latitude_range = (df['Latitude'].min(), df['Latitude'].max())
longitude_range = (df['Longitude'].min(), df['Longitude'].max())
latitude_step = 0.01
longitude_step = 0.01

grid_points = []

for lat in np.arange(latitude_range[0], latitude_range[1], latitude_step):
    for lon in np.arange(longitude_range[0], longitude_range[1], longitude_step):
        # Include 'Location' and 'Month' in grid_point_df
        grid_points.append((lat, lon, choice(locations), choice(months)))

# Create a DataFrame with grid points
grid_point_df = pd.DataFrame(grid_points, columns=['Latitude', 'Longitude', 'Location', 'Month'])

# Ensure every grid point has a valid month
grid_point_df['Month'] = grid_point_df['Month'].apply(lambda x: choice(months) if pd.isna(x) else x)

# Make sure the 'Location' and 'Month' columns in grid_point_df have the same categories as in the training data
grid_point_df['Location'] = pd.Categorical(grid_point_df['Location'], categories=encoder.categories_[0], ordered=False)
grid_point_df['Month'] = pd.Categorical(grid_point_df['Month'], categories=encoder.categories_[1], ordered=False)

# Feature engineering for grid points: One-hot encode categorical variables
encoded_features_grid = encoder.transform(grid_point_df[['Location', 'Month']])
grid_point_encoded_df = pd.concat([grid_point_df[['Latitude', 'Longitude']], pd.DataFrame(encoded_features_grid)], axis=1)

# Split the dataset into features (X) and labels (y)
X = encoded_df.drop('Crime Type', axis=1)
y = df['Crime Type']

# Train a Random Forest classifier
clf_rf = RandomForestClassifier()
clf_rf.fit(X, y)

# Save the trained model to a file
joblib.dump(clf_rf, 'random_forest_model.pkl')

# Load the trained model from the file
clf_rf_loaded = joblib.load('random_forest_model.pkl')

# Use the loaded model to predict the crime types for all grid points
predicted_crime_types = clf_rf_loaded.predict(grid_point_encoded_df)

# Store the predicted incidents for each grid point
predicted_incidents = {}

for i, point in enumerate(grid_points):
    predicted_incidents[point] = predicted_crime_types[i]

# Create a heatmap for predicted crime incidents
heat_map_predicted = HeatMap(
    data=list(predicted_incidents.keys()),
    radius=15,
    min_opacity=0.2,
    max_val=max(predicted_incidents.values()),
    gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}
)

heat_map_predicted.add_to(m)

# Save the map with predicted crime incidents to an HTML file
m.save('predicted_crime_intensity_map.html')




ValueError: Found unknown categories [nan] in column 0 during transform

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from random import choice
import folium
from folium.plugins import HeatMap
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import joblib

# Create a synthetic dataset
np.random.seed(0)

# Generate date range
start_date = datetime(2022, 1, 1)
end_date = datetime(2022, 12, 31)
date_range = [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]

# Generate crime data
n_samples = 1000
crime_types = ['Theft', 'Assault', 'Vandalism', 'Burglary', 'Drug Offense']
locations = ['Dormitory', 'Classroom Building', 'Library', 'Recreation Center', 'Cafeteria']
months = [d.strftime('%B') for d in date_range]
crime_data = []

for _ in range(n_samples):
    date = choice(date_range)
    crime_type = choice(crime_types)
    location = choice(locations)
    month = date.strftime('%B')
    latitude = round(np.random.uniform(37.0, 37.1), 6)
    longitude = round(np.random.uniform(-122.0, -121.9), 6)

    crime_data.append([date, crime_type, location, month, latitude, longitude])

# Create a DataFrame
columns = ['Date', 'Crime Type', 'Location', 'Month', 'Latitude', 'Longitude']
df = pd.DataFrame(crime_data, columns=columns)

# Feature engineering: One-hot encode categorical variables
encoder = OneHotEncoder(drop='first', sparse=False)
encoded_features = encoder.fit_transform(df[['Location', 'Month']])
encoded_df = pd.concat([df[['Latitude', 'Longitude']], pd.DataFrame(encoded_features)], axis=1)

# Create a base map
m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=10)

# Add markers with intensity based on the number of unique crime incidents
locations = list(zip(df['Latitude'], df['Longitude']))
heat_map = HeatMap(locations, radius=15)
heat_map.add_to(m)

# Save the map to an HTML file
m.save('crime_intensity_map.html')

# Create a grid of latitude and longitude coordinates
latitude_range = (df['Latitude'].min(), df['Latitude'].max())
longitude_range = (df['Longitude'].min(), df['Longitude'].max())
latitude_step = 0.01
longitude_step = 0.01

grid_points = []

for lat in np.arange(latitude_range[0], latitude_range[1], latitude_step):
    for lon in np.arange(longitude_range[0], longitude_range[1], longitude_step):
        # Include 'Location' and 'Month' in grid_point_df
        grid_points.append((lat, lon, choice(locations), choice(months)))

# Create a DataFrame with grid points
grid_point_df = pd.DataFrame(grid_points, columns=['Latitude', 'Longitude', 'Location', 'Month'])

# Ensure every grid point has a valid month
grid_point_df['Month'] = grid_point_df['Month'].apply(lambda x: choice(months) if pd.isna(x) else x)

# Make sure the 'Location' and 'Month' columns in grid_point_df have the same categories as in the training data
grid_point_df['Location'] = pd.Categorical(grid_point_df['Location'], categories=encoder.categories_[0], ordered=False)
grid_point_df['Month'] = pd.Categorical(grid_point_df['Month'], categories=encoder.categories_[1], ordered=False)

# Feature engineering for grid points: One-hot encode categorical variables
encoded_features_grid = encoder.transform(grid_point_df[['Location', 'Month']])
grid_point_encoded_df = pd.concat([grid_point_df[['Latitude', 'Longitude']], pd.DataFrame(encoded_features_grid)], axis=1)

# Split the dataset into features (X) and labels (y)
X = encoded_df.drop('Crime Type', axis=1)
y = df['Crime Type']

# Train a Random Forest classifier
clf_rf = RandomForestClassifier()
clf_rf.fit(X, y)

# Save the trained model to a file
joblib.dump(clf_rf, 'random_forest_model.pkl')

# Load the trained model from the file
clf_rf_loaded = joblib.load('random_forest_model.pkl')

# Use the loaded model to predict the crime types for all grid points
predicted_crime_types = clf_rf_loaded.predict(grid_point_encoded_df)

# Store the predicted incidents for each grid point
predicted_incidents = {}

for i, point in enumerate(grid_points):
    predicted_incidents[point] = predicted_crime_types[i]

# Create a heatmap for predicted crime incidents
heat_map_predicted = HeatMap(
    data=list(predicted_incidents.keys()),
    radius=15,
    min_opacity=0.2,
    max_val=max(predicted_incidents.values()),
    gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}
)

heat_map_predicted.add_to(m)

# Save the map with predicted crime incidents to




ValueError: Found unknown categories [nan] in column 0 during transform