Import Required libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import datetime

Open CSV file (may need to change paths)

In [2]:
df = pd.read_csv('crime_data_raw_2022C.csv')

Preprocess the data (we ran it and cleaned_csv has the data)

In [4]:
def get_week_of_year_from_string(timestamp_str):
    timestamp_format = "%m/%d/%Y %I:%M:%S %p"
    try:
        datetime_obj = datetime.datetime.strptime(timestamp_str, timestamp_format)
        week_number = datetime_obj.strftime("%U")
        return int(week_number)
    except ValueError:
        return None

df = pd.read_csv('/Users/manav/Desktop/cmpsc-497-fall-2023-final-project-ai-policing/implementation/crime_data_raw_2022C.csv')


x_to_name = {}
name_to_x = {}
# Iterate over DataFrame rows to create the mappings
for index, row in df.iterrows():
    x_to_name[row['IUCR']] = row['Primary Type']
    name_to_x[row['Primary Type']] = row['IUCR']
df['week_no'] = df['Date'].apply(get_week_of_year_from_string)
df = df.drop(columns=['Zip Codes', 'Date', 'Block', 'Primary Type', 'Beat','Location'])


df.to_csv('cleaned_dataset.csv', index=False)

In [3]:
df = pd.read_csv('/Users/manav/Desktop/cmpsc-497-fall-2023-final-project-ai-policing/cleaned_dataset.csv')

label_encoder = LabelEncoder()
df['crime_code'] = label_encoder.fit_transform(df['IUCR'])
df['District'] = label_encoder.fit_transform(df['District'])
df['Ward'] = label_encoder.fit_transform(df['Ward'])
df['FBI Code'] = label_encoder.fit_transform(df['FBI Code'])
df['Community Area'] = label_encoder.fit_transform(df['Community Area'])
scaler = StandardScaler()
df[['week_no']] = scaler.fit_transform(df[['week_no']])

df['Arrest'] = df['Arrest'].astype(int)

Selecting input and targets

In [4]:
features = df[['week_no', 'crime_code', 'Arrest', 'District', 'Ward', 'FBI Code']]
target = df['Community Area']


##Split the data set in train and test

In [5]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=31)

##Define the Neural Network Model

In [6]:
class CrimeRiskNN(nn.Module):
    def __init__(self, input_size, output_size):
        super(CrimeRiskNN, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.layer2 = nn.Linear(128, 64)
        self.output_layer = nn.Linear(64, output_size)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.output_layer(x)
        return x

In [7]:
# Initialize the network
input_size = features.shape[1]
unique_community_areas = df['Community Area'].nunique()
model = CrimeRiskNN(input_size, unique_community_areas)

In [8]:
# Loss and Optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [9]:
# Training the model
num_epochs = 50
for epoch in range(num_epochs):
    # Convert features and targets to tensors
    inputs = torch.tensor(X_train.values, dtype=torch.float32)
    targets = torch.tensor(y_train.values, dtype=torch.float32)

    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets.view(-1,1))

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [10/50], Loss: 1330.5240
Epoch [20/50], Loss: 931.8353
Epoch [30/50], Loss: 815.1242
Epoch [40/50], Loss: 788.2466
Epoch [50/50], Loss: 775.3954


In [34]:
crime_label_encoder = LabelEncoder()
df['crime_code'] = crime_label_encoder.fit_transform(df['IUCR'])

# Later in the script, to make a prediction
def predict_top_community_area(week_no, original_crime_code, k):
    # Convert original crime code to encoded value
    encoded_crime_code = crime_label_encoder.transform([original_crime_code])[0]
    
    # Prepare and normalize input data
    input_data = pd.DataFrame([[week_no, encoded_crime_code, 1, 0, 0, 0]], 
                              columns=['week_no', 'crime_code', 'Arrest', 'District', 'Ward', 'FBI Code'])
    input_data['week_no'] = scaler.transform(input_data[['week_no']])
    
    # Convert to tensor
    input_tensor = torch.tensor(input_data.values, dtype=torch.float32)

    # Get predictions
    prediction = model(input_tensor)
    ttop_k_values, top_k_indices = torch.topk(prediction, k)
    
    # Convert top_k_indices to a list or any other preferred format
    top_k_community_areas = top_k_indices.tolist()

    return top_k_community_areas

In [10]:
# Example usage
top_community_area = predict_top_community_area(week_no=51, original_crime_code='1120', k=23)
print(f'Top Community Area: {top_community_area}')

NameError: name 'predict_top_community_area' is not defined