# Risk and route assement

In [1]:
# Generate dataset
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [1]:


# Number of rows
num_rows = 1000000

# Generate a range of dates
start_date = datetime(2020, 1, 1)
end_date = datetime(2023, 12, 31)
date_range = pd.date_range(start_date, end_date, freq='T').to_list()  # Every minute

# Sample datetime values
date_times = np.random.choice(date_range, num_rows)

# Latitude and Longitude for Kenya
latitudes = np.random.uniform(-4.8, 4.6, num_rows)  # Approximate range for Kenya
longitudes = np.random.uniform(34.0, 41.0, num_rows)  # Approximate range for Kenya

# Crime types
crime_types = ['Robbery', 'Assault', 'Kidnapping', 'Burglary', 'Fraud']
crime_type = np.random.choice(crime_types, num_rows)

# Severity score between 1 and 10
severity = np.random.randint(1, 11, num_rows)

# Areas (dummy neighborhoods)
areas = ['Nairobi', 'Mombasa', 'Kisumu', 'Nakuru', 'Eldoret', 'Thika', 'Machakos', 'Nyeri']
area = np.random.choice(areas, num_rows)

# Time of day categories
def get_time_of_day(dt):
    hour = dt.hour
    if 5 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 17:
        return 'Afternoon'
    elif 17 <= hour < 21:
        return 'Evening'
    else:
        return 'Night'

time_of_day = [get_time_of_day(dt) for dt in date_times]

# Create the DataFrame
data = {
    'IncidentID': np.arange(1, num_rows + 1),
    'DateTime': date_times,
    'Latitude': latitudes,
    'Longitude': longitudes,
    'CrimeType': crime_type,
    'Severity': severity,
    'Area': area,
    'TimeOfDay': time_of_day
}

df = pd.DataFrame(data)

# Save to CSV for later use
df.to_csv('kenya_crime_data.csv', index=False)

print("Dataset generated and saved as 'kenya_crime_data.csv'")


  date_range = pd.date_range(start_date, end_date, freq='T').to_list()  # Every minute


Dataset generated and saved as 'kenya_crime_data.csv'


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
df = pd.read_csv('kenya_crime_data.csv')

# Convert datetime to features
df['Hour'] = pd.to_datetime(df['DateTime']).dt.hour
df['DayOfWeek'] = pd.to_datetime(df['DateTime']).dt.dayofweek

# Create a binary target for safety: 0 = Safe, 1 = Risky
df['Risky'] = (df['Severity'] > 5).astype(int)

# Features and target
features = ['Latitude', 'Longitude', 'Hour', 'DayOfWeek', 'CrimeType', 'Area', 'TimeOfDay']
X = pd.get_dummies(df[features], drop_first=True)
y = df['Risky']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Save the model for later use
import joblib
joblib.dump(rf_model, 'crime_assessment_model.pkl')


Accuracy: 0.49977
              precision    recall  f1-score   support

           0       0.50      0.51      0.51    100216
           1       0.50      0.49      0.49     99784

    accuracy                           0.50    200000
   macro avg       0.50      0.50      0.50    200000
weighted avg       0.50      0.50      0.50    200000



['crime_assessment_model.pkl']

In [2]:
import pandas as pd
import numpy as np
import networkx as nx
import joblib
import folium

# Load the dataset and trained model
df = pd.read_csv('kenya_crime_data.csv')
rf_model = joblib.load('crime_assessment_model.pkl')

# Create a graph
G = nx.Graph()

# Define a grid of nodes (latitude and longitude)
latitude_range = np.linspace(df['Latitude'].min(), df['Latitude'].max(), 100)
longitude_range = np.linspace(df['Longitude'].min(), df['Longitude'].max(), 100)

nodes = {}
node_id = 1
for lat in latitude_range:
    for lon in longitude_range:
        nodes[node_id] = (lat, lon)
        G.add_node(node_id, pos=(lat, lon))
        node_id += 1

# Add edges with weights based on crime risk
for i in range(1, node_id):
    for j in range(i + 1, node_id):
        # Calculate distance to limit to nearby nodes
        distance = np.linalg.norm(np.array(nodes[i]) - np.array(nodes[j]))
        if distance < 0.05:  # Limit the connections to nearby nodes
            # Simulate features for the midpoint
            mid_point_lat = (nodes[i][0] + nodes[j][0]) / 2
            mid_point_lon = (nodes[i][1] + nodes[j][1]) / 2
            
            mid_point_features = pd.DataFrame({
                'Latitude': [mid_point_lat],
                'Longitude': [mid_point_lon],
                'Hour': [12],  # Assume a fixed hour
                'DayOfWeek': [3],  # Assume a fixed day
                'CrimeType_Robbery': [1],  # Assume a fixed crime type
                'Area_Nairobi': [1],  # Assume a fixed area
                'TimeOfDay_Afternoon': [1]  # Assume a fixed time of day
            })
            risk = rf_model.predict_proba(mid_point_features)[:, 1]  # Probability of risk

            # Add edge to the graph with risk as the weight
            G.add_edge(i, j, weight=risk[0])

# Create a Folium map centered around the dataset's median latitude and longitude
map_center = [df['Latitude'].median(), df['Longitude'].median()]
crime_map = folium.Map(location=map_center, zoom_start=12)

# Add nodes to the map
for node_id, (lat, lon) in nodes.items():
    folium.CircleMarker(
        location=(lat, lon),
        radius=3,
        color='blue',
        fill=True,
        fill_color='blue'
    ).add_to(crime_map)

# Optionally add edges to the map to see connections (this can make the map cluttered)
for (i, j) in G.edges():
    lat_lon_i = nodes[i]
    lat_lon_j = nodes[j]
    weight = G[i][j]['weight']

    folium.PolyLine(
        locations=[lat_lon_i, lat_lon_j],
        color='red' if weight > 0.5 else 'green',
        weight=2
    ).add_to(crime_map)

# Display the map
crime_map.save('crime_risk_map.html')
crime_map


In [2]:
import pandas as pd
import numpy as np
import networkx as nx
import random
from sklearn.ensemble import RandomForestClassifier
import joblib
import folium

# Load the dataset and trained model
df = pd.read_csv('kenya_crime_data.csv')
rf_model = joblib.load('crime_assessment_model.pkl')

# Create a graph
G = nx.Graph()

# Define a grid of nodes (latitude and longitude)
latitude_range = np.linspace(df['Latitude'].min(), df['Latitude'].max(), 100)
longitude_range = np.linspace(df['Longitude'].min(), df['Longitude'].max(), 100)

nodes = {}
node_id = 1
for lat in latitude_range:
    for lon in longitude_range:
        nodes[node_id] = (lat, lon)
        G.add_node(node_id, pos=(lat, lon))
        node_id += 1

# Add edges with weights based on crime risk
for i in range(1, node_id):
    for j in range(i + 1, node_id):
        # Calculate distance to limit to nearby nodes
        distance = np.linalg.norm(np.array(nodes[i]) - np.array(nodes[j]))
        if distance < 0.05:  # Limit the connections to nearby nodes
            # Simulate features for the midpoint
            mid_point_lat = (nodes[i][0] + nodes[j][0]) / 2
            mid_point_lon = (nodes[i][1] + nodes[j][1]) / 2
            
            mid_point_features = pd.DataFrame({
                'Latitude': [mid_point_lat],
                'Longitude': [mid_point_lon],
                'Hour': [12],  # Assume a fixed hour
                'DayOfWeek': [3],  # Assume a fixed day
                # Assuming you have more features, include them here
                'CrimeType_Robbery': [1],  # Assume a fixed crime type
                'Area_Nairobi': [1],  # Assume a fixed area
                'TimeOfDay_Afternoon': [1]  # Assume a fixed time of day
            })
            risk = rf_model.predict_proba(mid_point_features)[:, 1]  # Probability of risk

            # Add edge to the graph with risk as the weight
            G.add_edge(i, j, weight=risk[0])

print("Graph created with nodes and edges based on crime risk.")


Graph created with nodes and edges based on crime risk.


In [3]:
 def find_safest_route(G, start_node, end_node):
    # Use Dijkstra's algorithm to find the path with the minimum cumulative risk
    return nx.dijkstra_path(G, source=start_node, target=end_node, weight='weight')

# Example: Finding the safest route between two nodes
start_node = 1  # Example start node
end_node = 1000  # Example end node

safest_route = find_safest_route(G, start_node, end_node)

# Convert node path to coordinates
safest_route_coords = [nodes[node] for node in safest_route]
print("Safest Route Coordinates:", safest_route_coords)


NetworkXNoPath: No path to 1000.

In [4]:
# Create a map centered around the start node
map_safest_route = folium.Map(location=nodes[start_node], zoom_start=12)

# Add the safest route to the map
folium.PolyLine(safest_route_coords, color="green", weight=2.5, opacity=1).add_to(map_safest_route)

# Save the map as an HTML file and display it
map_safest_route.save("safest_route.html")
map_safest_route


NameError: name 'safest_route_coords' is not defined

In [4]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
from sklearn.ensemble import RandomForestClassifier

# Load the dataset
df = pd.read_csv('kenya_crime_data.csv')

# Step 2: Create a grid of nodes (latitude and longitude) for Kenya
latitude_range = np.linspace(df['Latitude'].min(), df['Latitude'].max(), 50)
longitude_range = np.linspace(df['Longitude'].min(), df['Longitude'].max(), 50)

G = nx.Graph()
nodes = {}
node_id = 1

for lat in latitude_range:
    for lon in longitude_range:
        nodes[node_id] = (lat, lon)
        G.add_node(node_id, pos=(lat, lon))
        node_id += 1

print(f"Total nodes created: {G.number_of_nodes()}")


Total nodes created: 2500


In [None]:
# Feature Engineering
df['Hour'] = pd.to_datetime(df['DateTime']).dt.hour
df['DayOfWeek'] = pd.to_datetime(df['DateTime']).dt.dayofweek

# Define features and target for the model
features = ['Latitude', 'Longitude', 'Hour', 'DayOfWeek', 'CrimeType']
X = pd.get_dummies(df[features], drop_first=True)
y = df['Severity']  # Assuming Severity is the target variable

# Train a RandomForest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X, y)


In [None]:
# Function to calculate risk using the trained model
def calculate_risk(lat, lon, hour=12, day_of_week=3, crime_type='Robbery'):
    # Prepare the input features
    input_features = pd.DataFrame({
        'Latitude': [lat],
        'Longitude': [lon],
        'Hour': [hour],
        'DayOfWeek': [day_of_week],
        f'CrimeType_{crime_type}': [1]  # Example crime type
    })
    input_features = pd.get_dummies(input_features, drop_first=True).reindex(columns=X.columns, fill_value=0)
    
    # Predict risk (severity score)
    risk = rf_model.predict_proba(input_features)[:, 1]  # Using probability of risk
    return risk[0]

# Add edges to the graph with weights based on calculated risk
for i in range(1, node_id):
    for j in range(i + 1, node_id):
        distance = np.linalg.norm(np.array(nodes[i]) - np.array(nodes[j]))
        if distance < 0.05:  # Adjust this threshold to connect nearby nodes
            mid_point_lat = (nodes[i][0] + nodes[j][0]) / 2
            mid_point_lon = (nodes[i][1] + nodes[j][1]) / 2
            
            # Calculate risk for the midpoint
            risk = calculate_risk(mid_point_lat, mid_point_lon)
            
            # Add edge to the graph with risk as the weight
            G.add_edge(i, j, weight=risk)

print(f"Total edges created: {G.number_of_edges()}")


In [None]:
# Draw the graph
pos = nx.get_node_attributes(G, 'pos')
weights = nx.get_edge_attributes(G, 'weight')

plt.figure(figsize=(12, 8))

# Draw nodes
nx.draw_networkx_nodes(G, pos, node_size=10, node_color='blue')

# Draw edges with color intensity based on weight (risk)
edges = nx.draw_networkx_edges(G, pos, edge_color=[weights[edge] for edge in G.edges], edge_cmap=plt.cm.Reds, width=2)

# Draw edge labels (weights)
edge_labels = {(u, v): f'{d["weight"]:.2f}' for u, v, d in G.edges(data=True)}
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)

plt.title('Crime Risk Graph for Kenya')
plt.colorbar(edges)
plt.show()


In [None]:
# Create a map centered on the average location
average_lat = np.mean(latitude_range)
average_lon = np.mean(longitude_range)
crime_map = folium.Map(location=[average_lat, average_lon], zoom_start=6)

# Add nodes to the map
for node, (lat, lon) in nodes.items():
    folium.CircleMarker(location=[lat, lon], radius=2, color='blue').add_to(crime_map)

# Add edges to the map with weight-based color
for u, v, data in G.edges(data=True):
    lat_u, lon_u = nodes[u]
    lat_v, lon_v = nodes[v]
    folium.PolyLine(locations=[[lat_u, lon_u], [lat_v, lon_v]], 
                    color=plt.cm.Reds(data['weight']), 
                    weight=3).add_to(crime_map)

# Save and display the map
crime_map.save("kenya_crime_risk_map.html")
crime_map
