In [1]:
import numpy as np
import matplotlib.pyplot as plt
import csv
from functools import reduce
import re 
from collections import defaultdict

import math

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

import pandas as pd

import geopandas as gpd

import folium
from folium.plugins import MarkerCluster

import branca
import branca.colormap as cm 

DIR = '/home/bryce/Projects/Data_Science/Apt_Prices/'



In [3]:
df = pd.read_csv("/home/bryce/Projects/Data_Science/Apt_Prices/updated_latlngs_2-15-23.csv", sep=";")
df.head()
ll = df.loc[0].latlng
print("original shape: ", df.shape)
df = df.loc[~df.address.isna()]
print("shape after dropping records without an address: ", df.shape)
print(len(df.address.value_counts()))

original shape:  (1877, 9)
shape after dropping records without an address:  (1876, 9)
265


In [4]:
import re

ll_re = r'^\((\-?[0-9]+\.[0-9]+), (\-?[0-9]+\.[0-9]+)\)$'

def str_to_latlng(ll):
    if not ll or pd.isna(ll):
        return None
    
    m = re.match(ll_re, ll)
    if not m:
        print("Could not extract latlng from input string: ", ll)
        return [np.nan, np.nan]
    (lat, lng) = m.groups()
    lat, lng = float(lat), float(lng)
    return [lat, lng]



In [56]:
###### RUN THIS TO REDEFINE MAP

chs_latlng = str_to_latlng(ll)
m = folium.Map(location=chs_latlng, min_zoom=10, zoom_start=12, max_zoom=14)


In [28]:
median_per_location = df.groupby(['latlng']).median()
addresses_and_latlong = df[['latlng', 'address']].drop_duplicates()



per_location = pd.merge(median_per_location, addresses_and_latlong, on='latlng')
print(per_location.head())



                                    latlng   price  beds  baths   sq_ft  \
0                  (32.671597, -79.948109)  2750.0   2.0    2.0   960.0   
1  (32.68642553722663, -79.95647298094252)  2120.0   3.0    2.0  1879.0   
2  (32.70042214285714, -79.96714714285714)  2200.0   1.0    1.0   853.0   
3  (32.70631857785454, -79.96473084704715)  1850.0   2.0    2.0  1016.0   
4   (32.7094083267698, -79.95943342125899)  3000.0   3.0    1.0  1000.0   

       zip  units_in_building                                   address  
0  29412.0                1.0       2262 Folly Rd, Charleston, SC 29412  
1  29412.0                1.0       2021 Covey Ln, Charleston, SC 29412  
2  29412.0               25.0       1674 Folly Rd, Charleston, SC 29412  
3  29412.0                3.0       1559 Folly Rd, Charleston, SC 29412  
4  29412.0                1.0  1517 Westridge Cir, Charleston, SC 29412  


  median_per_location = df.groupby(['latlng']).median()


In [31]:
# Why are there so few Mt. P apartments?

mtp = df.loc[df.address.str.find("Pleasant") != -1]
#print(mtp.count())
mtp_with_latlng = mtp.loc[~mtp.latlng.isna()]
mtp_addresses = mtp.address.unique()
mtp_addresses_with_latlng = mtp_with_latlng.address.unique()
#print(mtp_addresses.shape)
#print(mtp_addresses_with_latlng.shape)
#print(mtp_addresses_with_latlng)

#b6b010ff


In [52]:


# Add a MarkerCluser for each latlng, and put all the markers for those listings into the cluster
# example code: https://github.com/python-visualization/folium/blob/main/examples/MarkerCluster.ipynb



# Things to change: 

# why are there so few apartments in mount pleasant? is this a reflection of our data, or are mt p apartments not getting mapped to latlng?
''' 
- Instead of using marker clusters, whose color and size can't be controled, let's just take the average or median rent and size for apartments with the same location,
and display the number of apartments on the circle marker.

- Make the area of the circle propertional to the apartment's square footage

- add scale for the pricing color
'''




def calculate_size_map():
    small_size = df.sq_ft.quantile(0.05)
    large_size = df.sq_ft.quantile(0.95)
    size_range = large_size - small_size
    min_pixels = 10

    # This makes the area of the circle proportional to square footage
    #scalar = (1.0 * small_size) / (min_pixels ** 2)
    #size_to_pixels = lambda sqft : math.sqrt(sqft / scalar)
    
    # This makes the radius of the circle proportional to square footage
    scalar = (1.0 * small_size) / min_pixels
    size_to_pixels = lambda sqft : sqft / scalar
    
    return size_to_pixels
    

def calculate_color_map(df):
    green = "#32A431"
    yellow = "#f7b500"
    red = "#bb1e10"
    low_rent = df['price'].quantile(0.05)
    high_rent = df['price'].quantile(0.95)
    rent_colormap = cm.LinearColormap(colors=[green, yellow, red], vmin=low_rent, vmax=high_rent)
    return rent_colormap

def add_feature_group(dataframe, group_name, m):
    group = folium.FeatureGroup(name=group_name).add_to(m)

    # x can be either the group or the marker_cluster
    def add_marker_to_x(row, x, sqft_to_pixels, rent_to_color):
        # Note that we've grouped by latlng, so we use row.name instead of row.latlng
        ll_str = row.latlng
        if pd.isna(ll_str):
            return
        popup_str = '{address}\n{beds} beds, {baths} baths, {sq_ft} square feet\n${price}'.format(address=row.address, beds=row.beds, baths=row.baths, sq_ft=row.sq_ft, price=row.price)
        popup = folium.Popup(popup_str)
        [lat, lng] = str_to_latlng(ll_str)
        color = rent_to_color(row.price)
        folium.CircleMarker(location=(lat, lng), radius=sqft_to_pixels(row.sq_ft), popup=popup, fill_color=color, color=color).add_to(x)


  

    medians_per_location = dataframe.groupby(['latlng']).median()
    addresses_and_latlong = dataframe[['latlng', 'address']].drop_duplicates()
    info_per_location = pd.merge(medians_per_location, addresses_and_latlong, on='latlng')
    rent_to_color = calculate_color_map(info_per_location)
    sqft_to_pixels = calculate_size_map()
    info_per_location.apply(lambda row : add_marker_to_x(row, x=group, sqft_to_pixels=sqft_to_pixels, rent_to_color=rent_to_color), axis=1)

    ''' Marker clusters were our previous way of handling buildings with multiple units. Keeping the code around just in case we wanna use clusters again
    # First, we'll handle all the apartments in buildings with multiple units. These markers get put into a cluster.
    latlng_count = dataframe.latlng.value_counts().to_dict()
    for latlng, ct in latlng_count.items():
        units = dataframe.loc[dataframe.latlng == latlng]
        if ct > 1:
            marker_cluster = MarkerCluster()
            units.apply(lambda row : add_marker_to_x(row, marker_cluster), axis = 1)
            marker_cluster.add_to(group)
        else:
            continue
    
    # Next, we'll handle all the apartments that have only one unit in the building
    single_locations_apts = dataframe.loc[dataframe.units_in_building == 1]
    single_locations_apts.apply(lambda row : add_marker_to_x(row, group), axis = 1)
    '''

In [14]:
unique_ltlng = df.latlng.value_counts().to_dict()

for l, c in unique_ltlng.items():
    print(l, c)

(32.80182475, -79.93711353021902) 294
(32.9141815, -79.8890818) 52
(32.9519091, -80.0465518) 42
(32.88442842507038, -79.975813714169) 42
(32.7772747, -79.9735456) 35
(32.972680561967614, -80.07647864763047) 33
(32.7981342, -79.9970017) 31
(32.7868329, -79.9571693) 31
(32.80083440816326, -80.09239428571428) 29
(32.824576, -80.0944085) 26
(32.814971, -80.031764) 26
(32.81338231312452, -80.00576865428256) 26
(32.9344458, -80.0035647) 26
(32.80825667393569, -79.9446478017767) 25
(32.803167, -80.114221) 25
(32.70042214285714, -79.96714714285714) 25
(32.86400766415608, -79.90708766766515) 24
(32.7894758, -79.9583148) 24
(32.8350712, -80.0667824) 24
(32.840951, -80.092167) 23
(32.808224, -80.00699) 23
(32.87821159838819, -80.06216860923003) 23
(32.72143937236513, -79.97397405345468) 21
(32.7936134, -79.9390543) 21
(32.798816, -79.9018924) 21
(32.796923, -80.083666) 20
(32.84576686076789, -79.8805888252356) 20
(32.9802705884324, -80.08709435876804) 20
(32.79816, -80.02302722222223) 20
(32.8494

In [57]:
one_brs = df.loc[df.beds == 1]
two_brs = df.loc[df.beds == 2]
three_brs = df.loc[df.beds == 3]
four_brs = df.loc[df.beds == 4]


add_feature_group(one_brs, "1 Bed", m)
add_feature_group(two_brs, "2 Beds", m)
add_feature_group(three_brs, "3 Beds", m)
add_feature_group(four_brs, "4 Beds", m)


folium.LayerControl().add_to(m)

  medians_per_location = dataframe.groupby(['latlng']).median()
  medians_per_location = dataframe.groupby(['latlng']).median()
  medians_per_location = dataframe.groupby(['latlng']).median()
  medians_per_location = dataframe.groupby(['latlng']).median()


<folium.map.LayerControl at 0x7f37fa3f0f10>

In [58]:
m