# 1. Setup

First, we import all the necessary libraries and then read in the csv files. We clean the data by removing all the rows of our accessibility dataset with na entries.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import requests
from pathlib import Path
import geopandas as gpd
import numpy as np
from shapely.geometry import Point

In [None]:
df = pd.read_csv("/content/Access_to_Everyday_Life_Dataset.csv")
df = df.dropna()
points_gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(
        df["geometry/coordinates/0"],
        df["geometry/coordinates/1"]
    ),
    crs="EPSG:4326"
)

block_groups = gpd.read_file(
    "/content/CensusBGGEO_2020_-3103699184132166589.geojson"
)

Now, we join the accessibility data with Seattle data sets, grouping by census block groups.

In [None]:

points_with_bg = gpd.sjoin(
    points_gdf,
    block_groups[["GEOID_20", "geometry"]],
    how="left",
    predicate="within"
)
points_with_bg['GEOID_20'] = pd.to_numeric(points_with_bg['GEOID_20'])

bg_csv = pd.read_csv(
    "CensusBGGEO_2020_-8839192176228303427.csv"
)

final = points_with_bg.merge(
    bg_csv,
    on="GEOID_20",
    how="left"
)

We then decide select which attributes will be needed to train the model.

In [None]:
attributes = [
              # Tract and Block Group which we will group upon
              'Census Tract and Block Group',
              # Features
              'Per Capita Income',
              'Median Age',
              'No Vehicles Available',
              "Population 18 years and Over with a Disability",
              "Land Acres",
              "Natural resources, construction, and maintenance occupations",
              "Population 20 to 64 years for whom poverty status is determined",
              "Total Population",
              "Bachelor degree or higher",
              "Families with income in the past 12 months below poverty level",
              "Families for whom poverty status is determined",
              # Labels
              'properties/severity']

In [None]:
group_data = final.groupby(attributes[:-1], as_index=False)['properties/severity'].mean()

# 2. Create Model and Train


In [None]:
class LinearRegression(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(in_features, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, out_features)
        )
    def forward(self, x):
        return 1 + 4 * torch.sigmoid(self.layers(x))

Create the test and train sets.

In [None]:

X_np = group_data[attributes[1:-1]].to_numpy().astype(np.float32)
y_np = group_data["properties/severity"].to_numpy().astype(np.float32).reshape(-1, 1)
from sklearn.model_selection import train_test_split
X_train_np, X_test_np, y_train_np, y_test_np = train_test_split(
    X_np, y_np, test_size=0.2, random_state=20
)

Then, we standardize the data.

In [None]:
mean = X_train_np.mean(axis=0, keepdims=True)
std  = X_train_np.std(axis=0, keepdims=True) + 1e-8
X_train_np = (X_train_np - mean) / std
X_test_np  = (X_test_np  - mean) / std
X_train = torch.tensor(X_train_np, dtype=torch.float32)
X_test  = torch.tensor(X_test_np, dtype=torch.float32)
y_train = torch.tensor(y_train_np, dtype=torch.float32)
y_test  = torch.tensor(y_test_np, dtype=torch.float32)

Instantiate the model.

In [None]:
model = LinearRegression(
    in_features=len(attributes) - 2,
    out_features=1
)

We create the loss and optimizer functions. Then, we define the accuracy function.

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

In [None]:
def accuracy(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item()
  acc = (correct / len(y_true)) * 100
  return acc;

We then, train the model.

In [None]:
epochs = 3000 #6000 at this point

for epoch in range(epochs):
    optimizer.zero_grad()

    predictions = model(X_train)
    loss = criterion(predictions, y_train)

    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f} | Acc: {accuracy(torch.round(y_train), torch.round(predictions)):.4f}")

Epoch 0 | Loss: 0.5883 | Acc: 42.0804
Epoch 100 | Loss: 0.5386 | Acc: 42.0804
Epoch 200 | Loss: 0.5007 | Acc: 43.0260
Epoch 300 | Loss: 0.4703 | Acc: 45.1537
Epoch 400 | Loss: 0.4449 | Acc: 48.4634
Epoch 500 | Loss: 0.4237 | Acc: 49.4090
Epoch 600 | Loss: 0.4061 | Acc: 50.3546
Epoch 700 | Loss: 0.3917 | Acc: 52.7187
Epoch 800 | Loss: 0.3802 | Acc: 52.7187
Epoch 900 | Loss: 0.3710 | Acc: 53.9007
Epoch 1000 | Loss: 0.3637 | Acc: 55.0827
Epoch 1100 | Loss: 0.3576 | Acc: 55.7920
Epoch 1200 | Loss: 0.3523 | Acc: 55.0827
Epoch 1300 | Loss: 0.3476 | Acc: 54.8463
Epoch 1400 | Loss: 0.3433 | Acc: 55.0827
Epoch 1500 | Loss: 0.3393 | Acc: 55.0827
Epoch 1600 | Loss: 0.3355 | Acc: 54.6099
Epoch 1700 | Loss: 0.3318 | Acc: 54.8463
Epoch 1800 | Loss: 0.3282 | Acc: 55.0827
Epoch 1900 | Loss: 0.3246 | Acc: 55.0827
Epoch 2000 | Loss: 0.3211 | Acc: 55.0827
Epoch 2100 | Loss: 0.3176 | Acc: 54.8463
Epoch 2200 | Loss: 0.3142 | Acc: 55.0827
Epoch 2300 | Loss: 0.3108 | Acc: 54.6099
Epoch 2400 | Loss: 0.3074 | 

Then, we get the accuracy from the test dataset.

In [None]:
with torch.no_grad():
    prediction = torch.round(model(X_test))


accuracy(torch.round(y_test), prediction)

59.43396226415094