In [None]:
from scipy.spatial import distance

regions = [
  {
    "name": "Bellevue",
    "center": (47.4419, -122.1577),
  },
  {
    "name": "Redmond",
    "center": (47.6062, -122.0837),
  },
  {
    "name": "Kirkland",
    "center": (47.6427, -122.2072),
  },
  {
    "name": "Issaquah",
    "center": (47.5133, -122.1250),
  },
  {
    "name": "Bothell",
    "center": (47.7000, -122.2250),
  },
  {
    "name": "North Seattle",
    "center": (47.6250, -122.3500),
  },
  {
    "name": "Woodinville",
    "center": (47.7500, -122.1750),
  },
  {
    "name": "Sammamish",
    "center": (47.6875, -122.0000),
  },
  {
    "name": "Mercer Island",
    "center": (47.5833, -122.2500),
  },
  {
    "name": "Shoreline",
    "center": (47.5625, -122.3750),
  },
  {
    "name": "Auburn",
    "center": (47.3750, -122.2500),
  },
  {
    "name": "Kent",
    "center": (47.2500, -122.3250),
  },
  {
    "name": "Federal Way",
    "center": (47.2167, -122.3000),
  },
  {
    "name": "Des Moines",
    "center": (47.3750, -122.2000),
  },
  {
    "name": "SeaTac",
    "center": (47.4167, -122.3167),
  },
]

def get_closest_region(x, y):
    centers = [(region["center"][0], region["center"][1]) for region in regions]
    distances = distance.cdist([(x, y)], centers, 'euclidean')[0]
    closest_index = distances.argmin()
    return regions[closest_index]["name"]

# Assuming your dataset is stored in a DataFrame called 'df' with columns 'lat' and 'long'
housing["region"] = housing.apply(lambda row: get_closest_region(row["lat"], row["long"]), axis=1)

adjustment_percentages = {
    "Bellevue": 18.2,
    "Redmond": 17.8,
    "Kirkland": 17.5,
    "Issaquah": 17.2,
    "Bothell": 16.9,
    "North Seattle": 16.7,
    "Woodinville": 16.6,
    "Sammamish": 16.5,
    "Mercer Island": 16.4,
    "Shoreline": 16.3,
    "Auburn": 12.3,
    "Kent": 12.2,
    "Federal Way": 12.1,
    "Des Moines": 12.4,
    "SeaTac": 12.5
}

def calculate_adjusted_price(row):
    region = row['region'] 
    price = row['price'] 
    date = row['unix_time'] #pd.to_datetime(row['date'], format='%Y%m%dT%H%M%S')  # Convert the date to datetime format
    
    # Get the adjustment percentage for the region
    adjustment_percentage = adjustment_percentages.get(region)

    # Calculate the number of days between the row's date and May 1st, 2014
    start_date = pd.Timestamp('2014-05-01')
    days_diff = (date - start_date).days

    # Calculate the adjustment based on the percentage and the number of days
    adjustment = (adjustment_percentage / 365) * days_diff

    # Calculate the adjusted price
    adjusted_price = price - (price * (adjustment / 100))

    return adjusted_price

def reverse_adjusted_price(adjusted_price, row):
    region = row['region']  # Assuming you have a 'region' column in your dataset
    date = row['unix_time'] #pd.to_datetime(row['date'], format='%Y%m%dT%H%M%S')  # Convert the date to datetime format
    
    # Get the adjustment percentage for the region
    adjustment_percentage = adjustment_percentages.get(region)

    # Calculate the number of days between the row's date and May 1st, 2014
    start_date = pd.Timestamp('2014-05-01')
    days_diff = (date - start_date).days

    # Calculate the adjustment based on the percentage and the number of days
    adjustment = (adjustment_percentage / 365) * days_diff

    # Reverse the adjustment by adding it back to the adjusted price
    price = adjusted_price + (adjusted_price * (adjustment / 100))

    return price

# After the test runs...
# Put the prediction back into the original, so we can use the date
X_test['predicted_price'] = test_predictions
# Apply the reverse adjustment to the predicted prices
y_test_adjusted = X_test.apply(lambda row: reverse_adjusted_price(row['predicted_price'], row), axis=1)
