<a href="https://colab.research.google.com/github/erikrozi/acmlab/blob/main/unsupervised.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# this mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# enter the foldername in the Shared Google Drive
FOLDERNAME = 'Shared drives/Unsupervised'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/{}'.format(FOLDERNAME))

%cd /content/drive/$FOLDERNAME/

Mounted at /content/drive
/content/drive/Shared drives/Unsupervised


In [None]:
# Importing the standard ML libraries...
%load_ext autoreload
%autoreload 2

import pandas as pd                     # to process our data
import matplotlib.pyplot as plt         # graphing
import numpy as np                      # matrices

import torch
import torchvision                      # for MNIST dataset/working with images

# take advantage of GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load Census Data

In [None]:
tax_returns_data = pd.read_csv("16zpallnoagi.csv")
cleaned_tax_data = pd.DataFrame(tax_returns_data, columns = ["ZIPCODE", "N1", "A02650"])

cleaned_tax_data

Unnamed: 0,ZIPCODE,N1,A02650
0,0,2016530,115806207
1,35004,5210,290260
2,35005,3100,121690
3,35006,1200,57418
4,35007,11940,697882
...,...,...,...
29969,83126,170,10687
29970,83127,1470,109088
29971,83128,920,88463
29972,83414,200,44409


In [None]:
ziplation_data = pd.read_csv("ziplatlon.csv", sep = ";")
parsed_ziplation_data = ziplation_data[(ziplation_data['latitude'] <= 34.3) & (ziplation_data['latitude'] >= 33.5) & (ziplation_data['longitude'] >= -118.6) & (ziplation_data['longitude'] <= -117.6)]
parsed_ziplation_data

Unnamed: 0,zip,city,state,latitude,longitude,timezone,dst,geopoint
98,90038,Los Angeles,CA,34.089459,-118.328500,-8,1,"34.089459,-118.3285"
99,90063,Los Angeles,CA,34.045161,-118.186500,-8,1,"34.045161,-118.1865"
100,92709,Irvine,CA,33.640302,-117.769442,-8,1,"33.640302,-117.769442"
252,90301,Inglewood,CA,33.955913,-118.358680,-8,1,"33.955913,-118.35868"
256,92837,Fullerton,CA,33.640302,-117.769442,-8,1,"33.640302,-117.769442"
...,...,...,...,...,...,...,...,...
42980,90397,Inglewood,CA,33.786594,-118.298662,-8,1,"33.786594,-118.298662"
43080,91716,City Of Industry,CA,33.786594,-118.298662,-8,1,"33.786594,-118.298662"
43135,92702,Santa Ana,CA,33.640302,-117.769442,-8,1,"33.640302,-117.769442"
43136,92834,Fullerton,CA,33.640302,-117.769442,-8,1,"33.640302,-117.769442"


In [None]:
import util as util
import webmercator as webmercator

In [None]:
z = 14
pos = np.empty([1980, 5])
for x in range(2794, 2839):
  for y in range(6528, 6572):
    curPos = (x - 2794) * (6572 - 6528) + (y - 6528)
    pos[curPos, 0] = x
    pos[curPos, 1] = y
    pos[curPos, 2] = webmercator.lat(y, z)
    pos[curPos, 3] = webmercator.lon(x, z)
    pos[curPos, 4] = util.getElevation(pos[curPos, 2], pos[curPos, 3])
image_pos_data = pd.DataFrame({'x': pos[:,0], 'y': pos[:,1], 'z': 14, 'latitude': pos[:,2], 'longitude': pos[:,3], 'elevation': pos[:,4]})
image_pos_data

Unnamed: 0,x,y,z,latitude,longitude,elevation
0,2794.0,6528.0,14,34.307144,-118.608398,660.0
1,2794.0,6529.0,14,34.288992,-118.608398,501.0
2,2794.0,6530.0,14,34.270836,-118.608398,392.0
3,2794.0,6531.0,14,34.252676,-118.608398,294.0
4,2794.0,6532.0,14,34.234512,-118.608398,280.0
...,...,...,...,...,...,...
1975,2838.0,6567.0,14,33.596319,-117.641602,231.0
1976,2838.0,6568.0,14,33.578015,-117.641602,151.0
1977,2838.0,6569.0,14,33.559707,-117.641602,178.0
1978,2838.0,6570.0,14,33.541395,-117.641602,136.0


eliminate ocean tiles

In [None]:
image_pos_data = image_pos_data[np.logical_not(image_pos_data['elevation'] == 0)]
image_pos_data

Unnamed: 0,x,y,z,latitude,longitude,elevation
0,2794.0,6528.0,14,34.307144,-118.608398,660.0
1,2794.0,6529.0,14,34.288992,-118.608398,501.0
2,2794.0,6530.0,14,34.270836,-118.608398,392.0
3,2794.0,6531.0,14,34.252676,-118.608398,294.0
4,2794.0,6532.0,14,34.234512,-118.608398,280.0
...,...,...,...,...,...,...
1975,2838.0,6567.0,14,33.596319,-117.641602,231.0
1976,2838.0,6568.0,14,33.578015,-117.641602,151.0
1977,2838.0,6569.0,14,33.559707,-117.641602,178.0
1978,2838.0,6570.0,14,33.541395,-117.641602,136.0


# Load the imagery data


In [None]:
from torch.utils.data import Dataset
from PIL import Image
from torchvision import transforms

class TileImageryDataset(Dataset):
  """Tile imagery dataset."""

  def __init__(self, images_zip_path):
    # Load into tensors

    # Perform data augmentation

# https://pytorch.org/tutorials/beginner/data_loading_tutorial.html
  def load_images():
    tensors = []
    for x in range(2794, 2839):
      for y in range(6528, 6572):
        image = Image.open(f"images/14_{x}_{y}.jpg").convert("RGB")
        # TODO change
        #jaden
        data = asarray(image) #convert to np array, could also do np.array(Image.open...)
        data = torch.from_numpy(data) #convert to tensor
        preprocess = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        input_tensor = preprocess(image)
        # tensory push thing?

IndentationError: ignored