# Exploratory Data Analysis (EDA)

This is an EDA performed on the real estate market trends in Conneticut. 


The raw data file was obtained from https://catalog.data.gov/dataset/real-estate-sales-2001-2018. On the wbsite, the file is described to include

>town, property address, date of sale, property type (residential, apartment, commercial, industrial or vacant land), sales price, and property assessment. 

>Annual real estate sales are reported by grand list year (October 1 through September 30 each year). For instance, sales from 2018 GL are from 10/01/2018 through 9/30/2019 (Data.gov).



Frequently used libraries are imported:

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from bokeh.plotting import figure, show, output_notebook, output_file, reset_output
output_notebook()
from bokeh.layouts import gridplot
from bokeh.models import HoverTool
import osmnx as ox
import geopandas as gpd
import re
from geopandas.tools import geocode

from geopy.geocoders import Nominatim



The dataset imported from the csv file:

In [None]:
real_estate=pd.read_csv('../datasets/Real_Estate_Sales_2001-2020_GL.csv')


To get the basic idea about the dataset, 10 rows are called:

In [None]:
real_estate.sample(10)

In [None]:
real_estate = real_estate.loc[real_estate['Property Type'] == "Condo"]
real_estate.info()

In [None]:
real_estate = real_estate.loc[real_estate['Residential Type'] == "Condo"]
real_estate.info()

In [None]:
GeoLocator = Nominatim(user_agent="bootcamp student")

In [None]:
from geopy.geocoders import Nominatim

xgeolocator = Nominatim(user_agent="my_geocoder")

loc = '213, Oakland Street, Manchester'
location = geolocator.geocode(loc)

if location is not None:
    print("Latitude:", location.latitude)
    print("Longitude:", location.longitude)
else:
    print("Geocoding failed for the provided address.")


In [None]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

# Initialize the geocoder
geolocator = Nominatim(user_agent="my_geocoder")

real_estate = pd.DataFrame(real_estate)

max_retries = 3

for index, row in real_estate.iterrows():
    address = row["Full Address"]
    retries = 0
    while retries < max_retries:
        try:
            location = geolocator.geocode(address, timeout=10)
            if location is not None:
                real_estate.at[index, "Latitude"] = location.latitude
                real_estate.at[index, "Longitude"] = location.longitude
                print("Address:", address)
                print("Latitude:", location.latitude)
                print("Longitude:", location.longitude)
                break  # Successful geocode, break out of the loop
            else:
                print("Geocoding failed for:", address)
                break  # No more retries needed if geocoding failed
        except GeocoderTimedOut:
            retries += 1
            print("Geocoding timed out for:", address)
            print("Retrying... (Attempt", retries, "of", max_retries, ")")
    else:
        real_estate.at[index, "Latitude"] = None
        real_estate.at[index, "Longitude"] = None
        print("Max retries reached for:", address)

print(real_estate)


In [None]:
real_estate.to_csv('updated_real_estate'.csv’)