In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
import numpy as np
import math
mpl.rcParams['figure.dpi']= 300
sns.set(rc={'text.usetex' : True})
import sys
sys.path.append("..")
import src.constants as cnst
from typing import Tuple, List

In [None]:
DATA_PATH = cnst.DATA_PATH
provided_data = cnst.provided_path
FIG_PATH = cnst.FIG_PATH
df = pd.read_csv(DATA_PATH + provided_data)
df.rename(columns={'PercentPopIncomeBelow2xPovertyLevel': 'PctPvty', 'AvgReduxinNighttimeAnnualTemp_Celsius': 'TempRedux', 'Percent_GreenSpace': 'PctGrSpc'}, inplace=True)

In [None]:
all_col =pd.read_csv(DATA_PATH + 'all_columns.csv')
all_col

### Optimization Algorithm

In [None]:
x = np.ones(193) #x is an array of utilities
green_pcts = df["PctGrSpc"].to_numpy()
poverty_pcts = df["PctPvty"].to_numpy()
area = all_col.area #array of areas
population = all_col.population #array of populations
funds = 2000

In [None]:
def geo_mean(x: np.ndarray) -> float:
    """
    Geometric mean with overflow protection
    """
    return np.exp(np.log(x).mean())

In [None]:
def green_cost(green_percent: float):
    return 500*(1-(green_percent*0.01))

In [None]:
def green_change(funds: int, prev_green: float, area: int) -> float:
    """
    Calculate change in green percentage given funds
    """
    return ((funds/green_cost(prev_green)) + (prev_green*area)) / area

In [None]:
green_change(200, .405, .5)

In [None]:
from sklearn.linear_model import LinearRegression
X = df.PctGrSpc.to_numpy().reshape(-1, 1)
y = df.TempRedux.to_numpy()
X_train, X_test = X[:-20], X[-20:]
y_train, y_test = y[:-20], y[-20:]
reg = LinearRegression().fit(X_train, y_train)
reg.coef_

In [None]:
def temp_change(curr_green: float, prev_green: float, model: LinearRegression) -> float:
    """
    Calculate temperature change resulting from additional green coverage
    """
    return model.predict([[curr_green]]) - model.predict([[prev_green]])

In [None]:
temp_change(70, 20, reg)

In [None]:
def util(population: int, prev_green: float, curr_green: float, poverty_pct: float, model: LinearRegression) -> float:
    """
    Calculate utility for a tract
    """
    return population * np.log(1 + temp_change(curr_green, prev_green, model)) * np.exp(poverty_pct*0.01)

In [None]:
def opt(x: np.ndarray, green_pct: np.ndarray, ar: np.ndarray, pop: np.ndarray, pvty_pct: np.ndarray, funds: int, injection: float, reg: LinearRegression) -> Tuple[np.ndarray, float, List]:
    """
    Optimize the utility function for geographic tracts
    Returns geometric mean
    """
    util_delta = []
    util_array = x
    while funds > 0:
        total_util = geo_mean(util_array)
        max_util_delta, max_util_idx = 0, -1
        for idx, u in enumerate(util_array):
            temp = util_array.copy()
            new_green = green_change(injection, green_pct[idx], ar[idx])
            if new_green > 90:
                continue
            else:
                temp[idx] += util(pop[idx], green_pct[idx], new_green, pvty_pct[idx], reg)
                curr_util = geo_mean(temp)
                delta = curr_util - total_util
                if delta >= max_util_delta:
                    max_util_delta = delta
                    max_util_idx = idx
        util_delta.append(max_util_delta)
        ng = green_change(injection, green_pct[max_util_idx], ar[max_util_idx])
        util_array[max_util_idx] += util(pop[max_util_idx], green_pct[max_util_idx], ng, pvty_pct[max_util_idx], reg)
        green_pct[max_util_idx] = ng
        funds -= injection
    return util_array, geo_mean(util_array), util_delta

In [None]:
test = opt(x.copy(), green_pcts.copy(), area.copy(), population.copy(), poverty_pcts.copy(), funds, 10, reg)

In [None]:
plt.plot(test[2])
plt.xlabel("Iteration")
plt.ylabel("$\Delta$Utility")
#plt.xlim(5, 194)
plt.title("Utility Change Over Optimization Process")
cnst.sf("viz_util_delta.png")

In [None]:
#plt.plot(test[0])
plt.scatter(poverty_pcts, test[0])
# plt.plot(green_pcts, test[0])
# plt.plot(area, test[0])
# plt.plot(population, test[0])

In [None]:
std = test[0].std()
mean = test[0].mean()
dat = [i for i, j in enumerate(test[0]) if j >= mean + 2 * std]
df_p = all_col.iloc[dat]

In [None]:
g = sns.scatterplot(data=df, x='lat', y='long', hue='PctPvty', palette="vlag")
g.legend(loc='center left', bbox_to_anchor=(1.25, 0.5), ncol=1)
plt.title("Poverty (\%) vs location")
plt.xlim([35.8, 36.05])
plt.ylim([-79.1, -78.8])

In [None]:
g = sns.scatterplot(data=df_p, x='latitude', y='longitude', hue='perc_below_2pov', palette="vlag")
g.legend(loc='center left', bbox_to_anchor=(1.25, 0.5), ncol=1)
plt.title("Poverty (\%) vs location")
plt.xlim([35.8, 36.05])
plt.ylim([-79.1, -78.8])

In [None]:
all_col.iloc[dat].describe()

In [None]:
all_col.describe()