In [27]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from skorch import NeuralNetClassifier
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import folium
from tqdm import tqdm

In [28]:
data = pd.read_csv("San_Francisco.csv")

In [29]:
# Load and preprocess data


data["Time"] = pd.to_datetime(data["Time"]).astype(int) / 10**9

encoder = LabelEncoder()
data["Category"] = encoder.fit_transform(data["Category"])
data["Part_of_Day"] = encoder.fit_transform(data["Part_of_Day"])
data["Day_of_Week"] = encoder.fit_transform(data["Day_of_Week"])

#Drop date
data.drop(["Date"], axis=1, inplace=True)

scaler = MinMaxScaler()
data[["Time", "Day_of_Week", "Part_of_Day", "Latitude", "Longitude"]] = scaler.fit_transform(
    data[["Time", "Day_of_Week", "Part_of_Day","Latitude", "Longitude"]]
)

In [30]:
# Prepare dataset and dataloader
class CrimeDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if idx >= len(self.data):
            raise IndexError
        features = torch.tensor(
            self.data.loc[idx, ['Time','Day_of_Week','Part_of_Day','Latitude','Longitude']].values, dtype=torch.float
        )
        label = torch.tensor(self.data.loc[idx, 'Category'], dtype=torch.long)
        return features, label


dataset = CrimeDataset(data)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Split dataset into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

train_data = train_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

train_dataset = CrimeDataset(train_data)
test_dataset = CrimeDataset(test_data)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [31]:
# Define Feedforward Neural Network
class CrimeNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(CrimeNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, hidden_size)
        self.layer3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        out = self.relu(out)
        out = self.layer3(out)
        return out


# Model parameters
input_size = 5
hidden_size = 32
num_classes = len(data["Category"].unique())

# Initialize model, loss function, and optimizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = CrimeNet(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [32]:

# Train the model
num_epochs = 5

for epoch in range(num_epochs):
    for i, (features, labels) in enumerate(dataloader):
        features = features.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(features)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print(
                f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(dataloader)}], Loss: {loss.item():.4f}"
            )

Epoch [1/100], Step [100/24597], Loss: 1.6020
Epoch [1/100], Step [200/24597], Loss: 1.7564
Epoch [1/100], Step [300/24597], Loss: 1.8460
Epoch [1/100], Step [400/24597], Loss: 1.6827
Epoch [1/100], Step [500/24597], Loss: 1.6968
Epoch [1/100], Step [600/24597], Loss: 1.7034
Epoch [1/100], Step [700/24597], Loss: 1.7645
Epoch [1/100], Step [800/24597], Loss: 1.7325
Epoch [1/100], Step [900/24597], Loss: 1.5281
Epoch [1/100], Step [1000/24597], Loss: 1.6017
Epoch [1/100], Step [1100/24597], Loss: 1.6606
Epoch [1/100], Step [1200/24597], Loss: 1.5338
Epoch [1/100], Step [1300/24597], Loss: 1.8715
Epoch [1/100], Step [1400/24597], Loss: 1.7230
Epoch [1/100], Step [1500/24597], Loss: 1.6083
Epoch [1/100], Step [1600/24597], Loss: 1.4874
Epoch [1/100], Step [1700/24597], Loss: 1.7606
Epoch [1/100], Step [1800/24597], Loss: 1.6171
Epoch [1/100], Step [1900/24597], Loss: 1.5824
Epoch [1/100], Step [2000/24597], Loss: 1.8462
Epoch [1/100], Step [2100/24597], Loss: 1.6694
Epoch [1/100], Step [2

KeyboardInterrupt: 

In [None]:
# Test the model
model.eval()
all_labels = []
all_predictions = []

with torch.no_grad():
    for features, labels in test_dataloader:
        features = features.to(device)
        labels = labels.to(device)

        outputs = model(features)
        _, predicted = torch.max(outputs.data, 1)

        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())



In [None]:
# Save the trained model
torch.save(model.state_dict(), "model.ckpt")

In [None]:
# Calculate accuracy, confusion matrix, and classification report
conf_matrix = confusion_matrix(all_labels, all_predictions)
class_report = classification_report(all_labels, all_predictions)
accuracy = accuracy_score(all_labels, all_predictions)

print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)
print("Accuracy:", accuracy)


Confusion Matrix:
 [[26914  1208 10607     0     0     0  3339]
 [11105  2670 10041     0     0     0  2270]
 [14641  1562 20876     0     0     0  2722]
 [ 3579   318  2338     0     0     0  1068]
 [ 9003   497  3165     0     0     0  1352]
 [ 1746   165   984     0     0     0   157]
 [10406   957  8723     0     0     0  5004]]
Classification Report:
               precision    recall  f1-score   support

           0       0.35      0.64      0.45     42068
           1       0.36      0.10      0.16     26086
           2       0.37      0.52      0.43     39801
           3       0.00      0.00      0.00      7303
           4       0.00      0.00      0.00     14017
           5       0.00      0.00      0.00      3052
           6       0.31      0.20      0.24     25090

    accuracy                           0.35    157417
   macro avg       0.20      0.21      0.18    157417
weighted avg       0.30      0.35      0.30    157417

Accuracy: 0.3523380575160243


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
