# 11-23-06 Geographic Visualization 

#### 1. Import data and libraries
#### 2. Data wrangling
#### 3. Data cleaning
#### 4. Plotting a choropleth

### 1. Import data and libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json

In [2]:
# This command propts matplotlib visuals to appear in the notebook 

%matplotlib inline

In [3]:
# Import ".json" file for Connecticut State 

country_geo = r'C:\Users\user\11-2023 Real Estate Sales Connecticut\Data\Original Data\ct-town-boundaries.geojson'

In [4]:
path = r'C:\Users\user\11-2023 Real Estate Sales Connecticut'

In [5]:
# Import the Connecticut Real Estate data

df = pd.read_csv(os.path.join(path,'Data', 'Prepared Data', 'realestate_sold.csv'), index_col= 0)

In [6]:
# Code to look at the JSON file contents:

f = open(country_geo)
  
# returns JSON object asa dictionary
data = json.load(f)
  
# Iterating through the json list
for i in data['features']:
    print(i)

{'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-73.405966, 41.428202], [-73.378953, 41.420531], [-73.347714, 41.350244], [-73.424331, 41.335297], [-73.435022, 41.372407], [-73.401177, 41.402862], [-73.405966, 41.428202]]]}, 'properties': {'name': 'Bethel'}}
{'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-73.244169, 41.2266], [-73.201416, 41.229474], [-73.187272, 41.220534], [-73.169546, 41.222772], [-73.157637, 41.228385], [-73.162828, 41.220233], [-73.154456, 41.194804], [-73.154232, 41.183038], [-73.165756, 41.154305], [-73.179856, 41.152605], [-73.221058, 41.139904], [-73.232636, 41.14253], [-73.233878, 41.157186], [-73.22227, 41.168936], [-73.21914, 41.197767], [-73.244169, 41.2266]]]}, 'properties': {'name': 'Bridgeport'}}
{'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-73.426943, 41.463842], [-73.437636, 41.465741], [-73.44441, 41.477569], [-73.441467, 41.491302], [-73.386965, 41.514802], [-73.375264, 41.489202]

In [7]:
df.shape

(995367, 12)

### 2. Data wrangling 

##### Choosing columns

In [8]:
# Selecting only the necessary columns
df_filtered=df[['Town','Sales Ratio']]

In [9]:
df_filtered.head()

Unnamed: 0,Town,Sales Ratio
0,Ansonia,0.463
1,Ashford,0.5883
2,Avon,0.7248
3,Avon,0.6958
4,Avon,0.5957


In [10]:
df1=df_filtered.rename(columns={'Town':'TOWN'})

In [11]:
df1

Unnamed: 0,TOWN,Sales Ratio
0,Ansonia,0.463000
1,Ashford,0.588300
2,Avon,0.724800
3,Avon,0.695800
4,Avon,0.595700
...,...,...
997208,New London,1.137665
997209,Waterbury,0.898400
997210,Windsor Locks,0.578300
997211,Middletown,0.726300


### 3. Conduct consistency checks

In [12]:
# Clean extreme values

df2 = df1[df1['Sales Ratio'] < 2] 
# 2 was picked because it is beyond belief any sale with price less than a half of Assessed Value belong to crises management actions and is out of market normal praxices !

In [13]:
df2.shape

(956893, 2)

### 4. Plotting a choropleth

In [14]:
# Create a data frame with just the states and the values for rating we want plotted

data_to_plot=df2

In [15]:
data_to_plot.head()  

Unnamed: 0,TOWN,Sales Ratio
0,Ansonia,0.463
1,Ashford,0.5883
2,Avon,0.7248
3,Avon,0.6958
4,Avon,0.5957


In [16]:
# Setup a folium map at a high-level zoom
map = folium.Map(location = [100, 0], zoom_start = 8)

# Choropleth maps bind Pandas Data Frames and json geometries. This allows us to visualize data combinations quickly
folium.Choropleth(
    geo_data = country_geo, 
    data = data_to_plot,
    columns = ['TOWN', 'Sales Ratio'],
    key_on = 'feature.properties.name', 
    fill_color = 'Purples', fill_opacity=0.9, line_opacity=0.1,
    legend_name = "C").add_to(map)
folium.LayerControl().add_to(map)

map

In [17]:
map.save('plot_data.html')

In [18]:
import folium
import pandas as pd
from folium.features import DivIcon
from branca.element import Template, MacroElement

# Create a map with adjusted zoom level
map = folium.Map(location=[41.6032, -73.0877], zoom_start=8)

# Add the choropleth layer with town data
folium.Choropleth(
    geo_data=country_geo,
    data=data_to_plot,
    columns=['TOWN', 'Sales Ratio'],
    key_on='feature.properties.name',
    fill_color='YlGn',  # Using ColorBrewer code for yellow-green color gradient
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Sales Ratio"
).add_to(map)

# Define town data as a pandas DataFrame with latitude, longitude, and town_name
town_data = pd.DataFrame([
    {'latitude': 41.68, 'longitude': -73.2628, 'town_name': 'Morris', 'font_color': 'blue'},
    {'latitude': 41.4342, 'longitude': -73.1163, 'town_name': 'Oxford', 'font_color': 'blue'},
    {'latitude': 41.5023, 'longitude': -72.9791, 'town_name': 'Prospect', 'font_color': 'blue'},
    {'latitude': 41.8314, 'longitude': -73.2197, 'town_name': 'Goshen', 'font_color': 'black'},
    {'latitude': 41.3948, 'longitude': -73.4540, 'town_name': 'Danbury', 'font_color': 'black'}
])

# Add town names as custom markers on the map with colored fonts and increased font size
for index, row in town_data.iterrows():
    folium.Marker(
        [row['latitude'], row['longitude']],
        icon=DivIcon(
            icon_size=(150, 36),
            icon_anchor=(0, 0),
            html='<div style="font-size: 12pt; color: '+row['font_color']+'; font-weight: bold;">'+row['town_name']+'</div>'
        )
    ).add_to(map)

# Save the map with the adjusted town names
map.save('Analytics', 'map_with_clear_town_names.html')
map