In [None]:
import requests
import geojson
import aiohttp
import asyncio
from geocodio import GeocodioClient

import pandas as pd
import matplotlib as plt
import rasterio
from rasterio import features
import numpy as np

from dotenv import load_dotenv
import os

CALCULATING CENTER FOR EACH CLUSTER

In [None]:
from shapely.geometry import Polygon, MultiPolygon
from shapely.ops import cascaded_union

def get_polygons_center(clustered_tif_path):
    with rasterio.open(clustered_tif_path) as src:
        data = src.read()
        image = src.read(1)
        global clusters
        clusters = src.read(2)

    class_vals = set(clusters.flatten()) - {-1}
    centers = []
    pcords = []
    manual_centers = []
    for class_val in class_vals:
        mask = clusters == class_val
        shapes = features.shapes(mask.astype('uint8'), transform=src.transform)
        local_mc = []
        polygons = []
        for polygon, value in shapes:
            polygons.append(Polygon(polygon['coordinates'][0]))
            mc = [0,0]
            mc[1] = sum([i[0] for i in polygon['coordinates'][0]])/len(polygon['coordinates'][0])
            mc[0] = sum([i[1] for i in polygon['coordinates'][0]])/len(polygon['coordinates'][0])
            local_mc.append(tuple(mc))
            pcords.append(polygon['coordinates'][0])
        manual_centers.append(tuple(sum(coords)/len(coords) for coords in zip(*local_mc)))
        multipolygon = MultiPolygon(polygons)
        center_pixel = multipolygon.centroid
        center_realworld = rasterio.transform.xy(src.transform, center_pixel.x, center_pixel.y)
        centers.append((center_realworld[1], center_realworld[0]))
    return manual_centers


In [None]:
centers = get_polygons_center("C:/Users/User/PycharmProjects/SafeRoute/clustered.tif")# TODO use relative path
len(centers)

In [None]:
min(centers)

In [None]:
max(centers)

In [None]:
load_dotenv()

REQUESTING DATA FROM GEOCODIO

In [None]:
gr = GeocodioClient(os.getenv("GEOCODE_API_KEY"))
address_sets = gr.reverse(centers, fields = ["acs-economics"])

In [None]:
address_sets[0]

In [None]:
household_income = address_sets[2]['results'][0]['fields']['acs']['economics']['Median household income']['Total']['value']
household_income

In [None]:
print([address_sets[0]['results'][i]['location']['lat'] for i in range(len(address_sets[0]['results']))])
print([address_sets[0]['results'][i]['location']['lng'] for i in range(len(address_sets[0]['results']))])
print(centers[0])

In [None]:
cords = []
for result in address_sets:
    if result['results']:
        # check is for keyerror due to bad data.(value if exists else 0)
        income_list = [result['results'][i]['fields']['acs']['economics']['Median household income']['Total']['value'] 
                       if result['results'][i]['fields']['acs']['economics']['Median household income'].get('Total', 0) 
                       else 0 for i in range(len(result['results']))]
        income = sum(list(filter(lambda x: x != 0, income_list)))/len(income_list)
        cords.append(income)

GETTING CLUSTER SCORE FOR EACH CLUSTER FROM REQUESTED DATA

In [None]:
cluster_score = {c: 0 for c in np.unique(clusters)}
for i in range(0, len(cluster_score)-1):
    cluster_score[i] = int(cords[i])
cluster_score

SAVING NORMAL DATAFRAME

In [None]:
df = pd.DataFrame(list(cluster_score.items()), columns=["cluster", "income"])
df.to_csv('average_income.csv', index=False)
df