In [None]:
### REAL ESTATE DATA ANALYSIS 
# Built from the RealEstate.mdf file compiled from *.CSV files downloaded from https://www.zillow.com/research/data/ .
# Additional tables built from within SQL.

# Current Tables (in use) within the database for this program (others are available, these are more streamlined): 
# -------------------------------
# AllThreeUnPivSalesPriceMo    *** Top, Mid & Bottom Tier SFR sales prices 
# ForecastSalesAvg2022         *** Projection of Top, Mid & Bottom Tier SFR sales prices by May of 2022
# Window_TMB                   ***Combination of Top, Mid & Bottom Tier Windowing tables ($$ Difference month over month) 
#   
# 
#          
#  ***Add rental rate table to database 


# *** First, in Anaconda promp run: "conda install -c anaconda pyodbc" to add pyodbc library to Jupyter Notebooks

# options to program for data isolation::: 
# ask for user input to guide through cmdLine menu for information library.
# 1) Question "What would you like to know?"
#    :::prompt user to type in a number index of their choice of the following options:
#        (1)=Avg Sales price change over time / (2)=Avg Sales price prediction 2022  

#  ***Add later      (3)=Avg Rental rate change over time / (4)=Avg Rental rate prediction 2022
# input_1 = input("What would you like to know? Type the number of the option that best fits your question and press enter.\n\n 1 = Avg Sales price change over time  2 = Avg Sales price prediction 2022 \n  3 = Avg Rental rate change over time \n 4 = Avg Rental rate prediction 2022 \n\n") 

# 2) Question: "What State would you like to know about?"
#    :::prompt user to type in state abbreviation or "all"

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pyodbc

In [2]:
pyodbc.drivers()

['SQL Server',
 'SQL Server Native Client 11.0',
 'SQL Server Native Client RDA 11.0',
 'ODBC Driver 17 for SQL Server']

In [5]:
cnxn_str = (
    r'DRIVER=SQL Server;'
    r'SERVER=(local)\MSSQLSERVER01;'
    r'Trusted_Connection=yes;'
    r'Database=RealEstate'
    #r'AttachedDbFileName=D:\SQL2019\MSSQLSERVER01\MSSQL\DATA\RealEstate.mdf;'
)
cnxn = pyodbc.connect(cnxn_str)
#df = pd.read_sql("SELECT * FROM sys.databases", cnxn)
#df

In [None]:
# Input number labels:::
input1_op1 = 'Avg Sales price change over time'
input1_op2 = 'Avg Sales price prediction 2022'
input1_op3 = 'Custom SQL Query'
input1_op4 = 'Avg Rental Rates change over time'
input1_op5 = 'Oracle Prediction'

In [4]:
# Programming for user input data navigation prompt:
def mainmenu():
    input_1 = input("""What would you like to know? \n\n
    Type the number of the option that best fits your question and press enter.\n\n 
    1 = Avg Sales price change over time  2 = Avg Sales price prediction 2022  3 = Custom SQL Query \n
    4 = Avg Rental Rates change over time  5 = Proprietary Oracle Prediction
    """)
    if input_1 == "3":
        print('You chose {} \n\n'.format(input1_op3))
        input_3 = input("""
        Type your SQL query from any of the Tables and corresponding schemas shown below \n\n 
        Use normal SQL syntax \n\n 
        Table: AllThreeUnPivSalesPriceMo \n 
        Schema: StateName | State | Date | Bottom Tier | Middle Tier | Top Tier \n\n 
        Table: ForecastSalesAvg2022 \n 
        Schema: Lstate | BottomTierProjection2022 | MiddleTierProjection2022 | TopTierProjection2022 \n\n 
        Table: Window_TMB \n 
        Schema: StateName | Lstate | Date | TopValue | TopPriorMonth | TopDiffPrevMonth | MiddleValue | MidPriorMonth \n| MidDiffPrevMonth | BottomValue | BottomPriorMonth | BottomDiffPrevMonth \n\n
        Table: RentalRatesUnPiv \n
        Schema: Lstate | CityName | Value | Date \n\n
        Table: RentalRates_Window \n
        Schema: Lstate | CityName | Date | Value | PriorMonth | DiffPriorMonth \n\n
        """)
        for i in input_3:
            try:
                print(pd.read_sql("{}".format(input_3),cnxn))
                mainmenu()
            except ProblemExecutingQuery:
                print("Error executing query...Check your syntax and try again \n\n")
                input_3 = input("Type your SQL query from any of the Tables and corresponding schemas shown below \n\n Use normal SQL syntax \n\n Table: AllThreeUnPivSalesPriceMo \n Schema: StateName | State | Date | Bottom Tier | Middle Tier | Top Tier \n\n Table: ForecastSalesAvg2022 \n Schema: Lstate | BottomTierProjection2022 | MiddleTierProjection2022 | TopTierProjection2022 \n\n Table: Window_TMB \n Schema: StateName | Lstate | Date | TopValue | TopPriorMonth | TopDiffPrevMonth | MiddleValue | MidPriorMonth | \n MidDiffPrevMonth | BottomValue | BottomPriorMonth | BottomDiffPrevMonth \n\n")
                for i in input_3:
                    try:
                        print(pd.read_sql("{}".format(input_3),cnxn))
                        mainmenu()
                    except ProblemExecutingQuery:
                        print("Error executing query...Check your syntax and try again \n\n")
                        mainmenu()
    elif input_1 == "5":
        print('You chose {} \n\n'.format(input1_op4))
        oracleInput_market = input("Type \"rental\" or \"sales\" to pick a market to calculate a prediction for \n\n")
        oracleInput_state = input("Type the two letter abbreviation of the state you want a prediction for (not case sensitive) \n\n")
        oracleInput_date = input("Type the numerical date you want a prediction for in the format: mm-dd-yyyy (with dashes) \n\n")
        print('You chose to predict the {} market in the state of {} around the time of {} \n\n'.format(oracleInput_market,oracleInput_state,oracleInput_date))                    
        mainmenu()
    input_2 = input("""
    What state would you like to know about? \n\n 
    Type the two letter state abbreviation for state isolated metrics \n\n 
    or \"all\" for national metrics.\n\n 
    example: tx (not case sensitive) \n\n""")
    if input_1 == "1" and input_2.lower() == "all":
        print('You chose {} with a filter of {}\n\n'.format(input1_op1, input_2.upper()))
        print( (pd.read_sql("SELECT * FROM Window_TMB", cnxn)) )
        input_0 = input("Type the number 0 and press enter to return to the main menu \n")
        if input_0 == "0":
            mainmenu()
        elif input_0 != "0":
            input("Invalid Input Error: Type the number 0 and press enter to return to the main menu \n")
            if input_0 == "0":
                mainmenu()
            elif input_0 != "0": 
                input("You're killing me. Type the number 0 and press enter to return to the main menu \n")
                if input_0 == "0":
                    mainmenu()
                elif input_0 != "0":
                    print("You killed me. (x__x) Just restart me already then.")
                    cnxn.close()
    elif input_1 == "1":
        print('You chose {} with a filter of {}\n\n'.format(input1_op1, input_2.upper()))
        print( (pd.read_sql("SELECT * FROM Window_TMB WHERE Lstate LIKE \'{}\'".format(input_2.upper()), cnxn)) )
        input_0 = input("Type the number 0 and press enter to return to the main menu \n")
        if input_0 == "0":
            mainmenu()   
        elif input_0 != "0":
            input("Invalid Input Error: Type the number 0 and press enter to return to the main menu \n")   
            if input_0 == "0":
                mainmenu()
            elif input_0 != "0": 
                input("You're killing me. Type the number 0 and press enter to return to the main menu \n")
                if input_0 == "0":
                    mainmenu()
                elif input_0 != "0":
                    print("You killed me. (x__x) Just restart me already then.")
                    cnxn.close()
    elif input_1 == "2" and input_2.lower() == "all":
        print('You chose {} with a filter of {}\n\n'.format(input1_op2, input_2.upper()))
        print( (pd.read_sql("SELECT * FROM ForecastSalesAvg2022", cnxn)) )
        input_0 = input("Type the number 0 and press enter to return to the main menu \n")
        if input_0 == "0":
            mainmenu()
        elif input_0 != "0":
            input("Invalid Input Error: Type the number 0 and press enter to return to the main menu \n")
            if input_0 == "0":
                mainmenu()
            elif input_0 != "0": 
                input("You're killing me. Type the number 0 and press enter to return to the main menu \n")
                if input_0 == "0":
                    mainmenu()
                elif input_0 != "0":
                    print("You killed me. (x__x) Just restart me already then.")
                    cnxn.close()
    elif input_1 == "2":
        print('You chose {} with a filter of {}\n\n'.format(input1_op2, input_2.upper()))
        print( (pd.read_sql("SELECT * FROM ForecastSalesAvg2022 WHERE Lstate LIKE \'{}\'".format(input_2.upper()), cnxn)) )
        input_0 = input("Type the number 0 and press enter to return to the main menu \n")
        if input_0 == "0":
            mainmenu()   
        elif input_0 != "0":
            input("Invalid Input Error: Type the number 0 and press enter to return to the main menu \n")   
            if input_0 == "0":
                mainmenu()
            elif input_0 != "0": 
                input("You're killing me. Type the number 0 and press enter to return to the main menu \n")
                if input_0 == "0":
                    mainmenu()
                elif input_0 != "0":
                    print("You killed me. (x__x) Just restart me already then.")
                    cnxn.close()
    elif input_1 == "4" and input_2.lower() == "all":
        print('You chose {} with a filter of {}\n\n'.format(input1_op4, input_2.upper()))
        print( (pd.read_sql("SELECT * FROM RentalRates_Window", cnxn)) )
        input_0 = input("Type the number 0 and press enter to return to the main menu \n")
        if input_0 == "0":
            mainmenu()   
        elif input_0 != "0":
            input("Invalid Input Error: Type the number 0 and press enter to return to the main menu \n")   
            if input_0 == "0":
                mainmenu()
            elif input_0 != "0": 
                input("You're killing me. Type the number 0 and press enter to return to the main menu \n")
                if input_0 == "0":
                    mainmenu()
                elif input_0 != "0":
                    print("You killed me. (x__x) Just restart me already then.")
                    cnxn.close()  
    elif input_1 == "4":
        print('You chose {} with a filter of {}\n\n'.format(input1_op4, input_2.upper()))
        print( (pd.read_sql("SELECT * FROM RentalRates_Window WHERE Lstate LIKE \'{}\'".format(input_2.upper()), cnxn)) )
        input_0 = input("Type the number 0 and press enter to return to the main menu \n")
        if input_0 == "0":
            mainmenu()   
        elif input_0 != "0":
            input("Invalid Input Error: Type the number 0 and press enter to return to the main menu \n")   
            if input_0 == "0":
                mainmenu()
            elif input_0 != "0": 
                input("You're killing me. Type the number 0 and press enter to return to the main menu \n")
                if input_0 == "0":
                    mainmenu()
                elif input_0 != "0":
                    print("You killed me. (x__x) Just restart me already then.")
                    cnxn.close()                                       

                    
mainmenu() 

What would you like to know? Type the number of the option that best fits your question and press enter.

 1 = Avg Sales price change over time  2 = Avg Sales price prediction 2022  3 = Custom SQL Query 

3

        Type your SQL query from any of the Tables and corresponding schemas shown below 

 
        Use normal SQL syntax 

 
        Table: AllThreeUnPivSalesPriceMo 
 
        Schema: StateName | State | Date | Bottom Tier | Middle Tier | Top Tier 

 
        Table: ForecastSalesAvg2022 
 
        Schema: Lstate | BottomTierProjection2022 | MiddleTierProjection2022 | TopTierProjection2022 

 
        Table: Window_TMB 
 
        Schema: StateName | Lstate | Date | TopValue | TopPriorMonth | TopDiffPrevMonth | MiddleValue | MidPriorMonth 
| MidDiffPrevMonth | BottomValue | BottomPriorMonth | BottomDiffPrevMonth 


        Table: RentalRatesUnPiv 

        Schema: Lstate | CityName | Value | Date 


        Table: RentalRates_Window 

        Schema: Lstate | CityName | Date | Val

NameError: name 'ProblemExecutingQuery' is not defined

In [None]:
# SELECT Lstate FROM Window_TMB WHERE Lstate LIKE 'TX'

In [7]:
# Converting each table in the database into individual pandas data frames:
# *** Excluding 'middle tier' 
input_2 = input("What state would you like to know about? \n\n Type the two letter state abbreviation for state isolated metrics \n\n or \"all\" for national metrics.\n\n example: tx (not case sensitive) \n\n")


df_WindowTMB_all = pd.read_sql("SELECT * FROM Window_TMB", cnxn)
df_WindowTMB_state = pd.read_sql("SELECT * FROM Window_TMB WHERE Lstate LIKE \'{}\'".format(input_2.upper()), cnxn)

df_ForecastSalesAvg2022_all = pd.read_sql("SELECT * FROM ForecastSalesAvg2022", cnxn)
df_ForecastSalesAvg2022_state = pd.read_sql("SELECT * FROM ForecastSalesAvg2022 WHERE Lstate LIKE \'{}\'".format(input_2.upper()), cnxn)

df_AllThreeUnPivSalesPriceMo_all = pd.read_sql("SELECT * FROM AllThreeUnPivSalesPriceMo", cnxn)
df_AllThreeUnPivSalesPriceMo_state = pd.read_sql("SELECT * FROM AllThreeUnPivSalesPriceMo WHERE State LIKE \'{}\'".format(input_2.upper()), cnxn)

df_RentalRates_Window_all = pd.read_sql("SELECT * FROM RentalRates_Window", cnxn)
df_RentalRates_Window_state = pd.read_sql("SELECT * FROM RentalRates_Window WHERE Lstate LIKE \'{}\'".format(input_2.upper()), cnxn)

df_RentalRatesUnPiv_all = pd.read_sql("SELECT * FROM RentalRatesUnPiv", cnxn)
df_RentalRatesUnPiv_state = pd.read_sql("SELECT * FROM RentalRatesUnPiv WHERE Lstate LIKE \'{}\'".format(input_2.upper()), cnxn)


What state would you like to know about? 

 Type the two letter state abbreviation for state isolated metrics 

 or "all" for national metrics.

 example: tx (not case sensitive) 

tx


In [9]:
df_RentalRatesUnPiv_all.describe()

Unnamed: 0,Value
count,9419.0
mean,1313.997983
std,408.159519
min,581.0
25%,1055.0
50%,1205.0
75%,1443.0
max,3096.0


In [10]:
# state and value definitions for Sales price Forecast Scatterplot
# *** "ALL" OPTION
fvalue1 = np.array(df_ForecastSalesAvg2022_all.BottomTierProjection2022)
fvalue2 = np.array(df_ForecastSalesAvg2022_all.MidTierProjection2022)
fvalue3 = np.array(df_ForecastSalesAvg2022_all.TopTierProjection2022)
fstate1 = np.array(df_ForecastSalesAvg2022_all.Lstate)
fcity1 = np.array(df_ForecastSalesAvg2022_all.CityName)

# *** "STATE" OPTION
fvalue4 = np.array(df_ForecastSalesAvg2022_state.BottomTierProjection2022)
fvalue5 = np.array(df_ForecastSalesAvg2022_state.MidTierProjection2022)
fvalue6 = np.array(df_ForecastSalesAvg2022_state.TopTierProjection2022)
fstate2 = np.array(df_ForecastSalesAvg2022_state.Lstate)
fcity2 = np.array(df_ForecastSalesAvg2022_state.CityName)

In [11]:
fvalue1

array([196621.02437303, 195883.23423635, 194960.99678537, ...,
       195883.23423635, 199203.28941168, 193669.8647058 ])

In [12]:
type(fvalue1)

numpy.ndarray

In [13]:
fstate1

array(['AK', 'AK', 'AK', ..., 'WY', 'WY', 'WY'], dtype=object)

In [14]:
type(fstate1)

numpy.ndarray

In [None]:
# *** "ALL" OPTION
# move below plt construct code under respective "elif" statement in the mainmenu() function:
# this scatterplot is for the 2022 price projection, X-axis value is State
# from df_ForecastSalesAvg2022_all

# Bottom Tier
plt.figure(figsize=(10,6))
plt.title("Bottom Tier Projection May 2022",fontsize=20)
plt.xlabel("State",fontsize=16)
plt.ylabel("Value",fontsize=16)
plt.grid (False)
plt.ylim(100000,300000)
plt.xticks([i*6 for i in range(10)],fontsize=15)
plt.yticks(fontsize=15)
plt.scatter(x=fstate1 , y=fvalue1 , c='red' , s=20 , edgecolors='m')
#plt.text(x=30,y=45,s="Weights are more or less similar \nafter 18-20 years of age",fontsize=15)
plt.vlines(x=40,ymin=0,ymax=100,linestyles='dashed',color='k',lw=3)
plt.legend(['Home Values'],loc=2,fontsize=12)
plt.show()


# Middle Tier
plt.figure(figsize=(10,6))
plt.title("Middle Tier Projection May 2022",fontsize=20)
plt.xlabel("State",fontsize=16)
plt.ylabel("Value",fontsize=16)
plt.grid (False)
plt.ylim(200000,500000)
plt.xticks([i*6 for i in range(10)],fontsize=15)
plt.yticks(fontsize=15)
plt.scatter(x=fstate1 , y=fvalue2 , c='red' , s=20 , edgecolors='m')
#plt.text(x=30,y=45,s="Weights are more or less similar \nafter 18-20 years of age",fontsize=15)
plt.vlines(x=40,ymin=0,ymax=100,linestyles='dashed',color='k',lw=3)
plt.legend(['Home Values'],loc=2,fontsize=12)
plt.show()


# Top Tier
plt.figure(figsize=(10,6))
plt.title("Top Tier Projection May 2022",fontsize=20)
plt.xlabel("State",fontsize=16)
plt.ylabel("Value",fontsize=16)
plt.grid (False)
plt.ylim(350000,700000)
plt.xticks([i*6 for i in range(10)],fontsize=15)
plt.yticks(fontsize=15)
plt.scatter(x=fstate1 , y=fvalue3 , c='red' , s=20 , edgecolors='m')
#plt.text(x=30,y=45,s="Weights are more or less similar \nafter 18-20 years of age",fontsize=15)
plt.vlines(x=40,ymin=0,ymax=100,linestyles='dashed',color='k',lw=3)
plt.legend(['Home Values'],loc=2,fontsize=12)
plt.show()

In [None]:
# *** "STATE" OPTION

# this scatterplot is for the 2022 price projection, X-axis value is State
# from df_ForecastSalesAvg2022_all

input_2 = input("What state would you like to know about? \n\n Type the two letter state abbreviation for state isolated metrics \n\n or \"all\" for national metrics.\n\n example: tx (not case sensitive) \n\n")

# Bottom Tier
plt.figure(figsize=(10,6))
plt.title("Bottom Tier Projection May 2022 for {}".format(input_2.upper()),fontsize=20)
plt.xlabel("City",fontsize=16)
plt.ylabel("Value",fontsize=16)
plt.grid (False)
plt.ylim(100000,300000)
plt.xticks([i*6 for i in range(10)],fontsize=15)
plt.yticks(fontsize=15)
plt.scatter(x=fcity2 , y=fvalue4 , c='red' , s=20 , edgecolors='m')
#plt.text(x=30,y=45,s="Weights are more or less similar \nafter 18-20 years of age",fontsize=15)
plt.vlines(x=40,ymin=0,ymax=100,linestyles='dashed',color='k',lw=3)
plt.legend(['Home Values'],loc=2,fontsize=12)
plt.show()


# Middle Tier
plt.figure(figsize=(10,6))
plt.title("Middle Tier Projection May 2022 for {}".format(input_2.upper()),fontsize=20)
plt.xlabel("City",fontsize=16)
plt.ylabel("Value",fontsize=16)
plt.grid (False)
plt.ylim(200000,500000)
plt.xticks([i*6 for i in range(10)],fontsize=15)
plt.yticks(fontsize=15)
plt.scatter(x=fcity2 , y=fvalue5 , c='red' , s=20 , edgecolors='m')
#plt.text(x=30,y=45,s="Weights are more or less similar \nafter 18-20 years of age",fontsize=15)
plt.vlines(x=40,ymin=0,ymax=100,linestyles='dashed',color='k',lw=3)
plt.legend(['Home Values'],loc=2,fontsize=12)
plt.show()


# Top Tier
plt.figure(figsize=(10,6))
plt.title("Top Tier Projection May 2022 for {}".format(input_2.upper()),fontsize=20)
plt.xlabel("City",fontsize=16)
plt.ylabel("Value",fontsize=16)
plt.grid (False)
plt.ylim(350000,700000)
plt.xticks([i*6 for i in range(10)],fontsize=15)
plt.yticks(fontsize=15)
plt.scatter(x=fcity2 , y=fvalue6 , c='red' , s=20 , edgecolors='m')
#plt.text(x=30,y=45,s="Weights are more or less similar \nafter 18-20 years of age",fontsize=15)
plt.vlines(x=40,ymin=0,ymax=100,linestyles='dashed',color='k',lw=3)
plt.legend(['Home Values'],loc=2,fontsize=12)
plt.show()

In [None]:
# state and date definitions for Change over Time (Window_TMB) Scatterplot



In [None]:
# move below plt construct code under respective "elif" statement in the mainmenu() function:

# this one is for the sales prices over time, X-axis value is Date
# from the Window_TMB table
date = 
value = 

plt.figure(figsize=(10,6))
plt.title("Change in price over time",fontsize=20)
plt.xlabel("Date",fontsize=16)
plt.ylabel("Value",fontsize=16)
plt.grid (False)
plt.ylim(10,100)
plt.xticks([i*6 for i in range(10)],fontsize=15)
plt.yticks(fontsize=15)
plt.scatter(x=date , y=value , c='red' , s=150 , edgecolors='m')
#plt.text(x=30,y=45,s="Weights are more or less similar \nafter 18-20 years of age",fontsize=15)
plt.vlines(x=20,ymin=0,ymax=100,linestyles='dashed',color='k',lw=3)
plt.legend(['Home Values'],loc=2,fontsize=12)
plt.show()

In [None]:
cnxn.close()