In [4]:
#Imports
import pandas as pd
import pyodbc
import sqlalchemy
from sqlalchemy import create_engine
import urllib
import numpy as np

In [5]:
# Watermark
print('Nathan Young\nJunior Data Analyst\nCenter for the Study of Free Enterprise')
%load_ext watermark
%watermark -a "Western Carolina University" -u -d -p pandas

Nathan Young
Junior Data Analyst
Center for the Study of Free Enterprise
The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Western Carolina University 
last updated: 2020-02-22 

pandas 1.0.1


In [6]:
# Create Backups
#df_backup = pd.read_csv('./Updates/STG_ZLLW_County_MedianListingPrice_AllHomes.txt')
#df_backup.to_csv('./Backups/STG_ZLLW_County_MedianListingPrice_AllHomes_BACKUP.txt')

In [7]:
#Load Land data
df_mlp = pd.read_csv('http://files.zillowstatic.com/research/public/County/Sale_Prices_County.csv',
                     encoding='ISO-8859-1')

#Display table to ensure data loaded correctly
df_mlp.head()

Unnamed: 0,RegionID,RegionName,StateName,SizeRank,2008-03,2008-04,2008-05,2008-06,2008-07,2008-08,...,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12,2020-01
0,3101,Los Angeles County,California,1,458300.0,448000.0,431700.0,419900.0,403800.0,388700.0,...,605500.0,604800.0,603500.0,606200.0,608700.0,612900.0,611200.0,615700.0,624600.0,633000.0
1,139,Cook County,Illinois,2,285300.0,280300.0,255900.0,242800.0,235800.0,234900.0,...,256200.0,245300.0,236200.0,232100.0,232700.0,234200.0,235400.0,237300.0,241400.0,244700.0
2,1090,Harris County,Texas,3,137600.0,135500.0,132000.0,132000.0,132400.0,134300.0,...,207700.0,208500.0,210000.0,209600.0,211300.0,213000.0,218700.0,220600.0,222700.0,222600.0
3,2402,Maricopa County,Arizona,4,231000.0,221400.0,212100.0,207500.0,201500.0,196100.0,...,249500.0,249500.0,251200.0,254800.0,255900.0,257900.0,260700.0,265700.0,269100.0,275000.0
4,2841,San Diego County,California,5,401900.0,395400.0,380800.0,369700.0,353500.0,341600.0,...,561200.0,559800.0,563100.0,561500.0,565800.0,562800.0,569400.0,576700.0,583600.0,587700.0


In [8]:
#Filter data to NC

filter1 = df_mlp['State'] == "NC"
df_mlp_nc = df_mlp[filter1]

#Check to ensure filter worked
df_mlp_nc.head(5)

KeyError: 'State'

In [None]:
#View data types of dataframe
df_mlp_nc.dtypes

In [None]:
#Change MunicipalCodeFIPS dtype to add leading 0's
df_mlp_nc.loc[ :, 'MunicipalCodeFIPS'] = df_mlp_nc['MunicipalCodeFIPS'].astype(str)
df_mlp_nc.dtypes

In [None]:
#Add leading 0's and check to ensure they were added
df_mlp_nc.loc[ :, 'MunicipalCodeFIPS'] = df_mlp_nc['MunicipalCodeFIPS'].str.zfill(3)
df_mlp_nc.head(5)

In [None]:
# Set Index to Region Name
df_mlp_nc.set_index(df_mlp_nc['RegionName'], inplace = True)
df_mlp_nc

In [None]:
# Drop Region Name column
df_mlp_nc.drop('RegionName', axis = 1, inplace = True)
df_mlp_nc

In [None]:
#Save to csv file for export in Excel
#df_mlp_nc.to_csv('./Updates/STG_ZLLW_County_MedianListingPrice_AllHomes.txt', sep ='\t')

In [None]:
#Reset Index for upload to database
df_mlp_nc = df_mlp_nc.reset_index()    

In [None]:
#Fill NaN values for upload to database
df_mlp_nc['Metro'] = df_mlp_nc['Metro'].replace(np.nan,'', regex=True)

column_list = df_mlp_nc.columns.values
for i in column_list:
    df_mlp_nc.loc[df_mlp_nc[i].isnull(),i]=0

In [None]:
#Connect to database and create cursor
con = pyodbc.connect('Driver={SQL Server};'
                      'Server=TITANIUM-BOOK;'
                      'Database=DataDashboard;'
                      'Trusted_Connection=yes;',
                    autocommit=True)

c = con.cursor()

In [None]:
#Drop old backup table
c.execute('drop table STG_ZLLW_County_MedianListingPrice_AllHomes_BACKUP')

In [None]:
#Create new backup
c.execute('''sp_rename 'dbo.STG_ZLLW_County_MedianListingPrice_AllHomes','STG_ZLLW_County_MedianListingPrice_AllHomes_BACKUP';''')

In [None]:
c.execute('''USE [DataDashboard]

SET ANSI_NULLS ON

SET QUOTED_IDENTIFIER ON

CREATE TABLE [dbo].[STG_ZLLW_County_MedianListingPrice_AllHomes](
	[RegionName] [varchar](40) NULL,
	[State] [varchar](2) NULL,
	[Metro] [varchar](40) NULL,
	[StateCodeFIPS] [varchar](2) NULL,
	[MunicipalCodeFIPS] [varchar](3) NULL,
	[SizeRank] [smallint] NULL,
	[2010-01] [float] NULL,
	[2010-02] [float] NULL,
	[2010-03] [float] NULL,
	[2010-04] [float] NULL,
	[2010-05] [float] NULL,
	[2010-06] [float] NULL,
	[2010-07] [float] NULL,
	[2010-08] [float] NULL,
	[2010-09] [float] NULL,
	[2010-10] [float] NULL,
	[2010-11] [float] NULL,
	[2010-12] [float] NULL,
	[2011-01] [float] NULL,
	[2011-02] [float] NULL,
	[2011-03] [float] NULL,
	[2011-04] [float] NULL,
	[2011-05] [float] NULL,
	[2011-06] [float] NULL,
	[2011-07] [float] NULL,
	[2011-08] [float] NULL,
	[2011-09] [float] NULL,
	[2011-10] [float] NULL,
	[2011-11] [float] NULL,
	[2011-12] [float] NULL,
	[2012-01] [float] NULL,
	[2012-02] [float] NULL,
	[2012-03] [float] NULL,
	[2012-04] [float] NULL,
	[2012-05] [float] NULL,
	[2012-06] [float] NULL,
	[2012-07] [float] NULL,
	[2012-08] [float] NULL,
	[2012-09] [float] NULL,
	[2012-10] [float] NULL,
	[2012-11] [float] NULL,
	[2012-12] [float] NULL,
	[2013-01] [float] NULL,
	[2013-02] [float] NULL,
	[2013-03] [float] NULL,
	[2013-04] [float] NULL,
	[2013-05] [float] NULL,
	[2013-06] [float] NULL,
	[2013-07] [float] NULL,
	[2013-08] [float] NULL,
	[2013-09] [float] NULL,
	[2013-10] [float] NULL,
	[2013-11] [float] NULL,
	[2013-12] [float] NULL,
	[2014-01] [float] NULL,
	[2014-02] [float] NULL,
	[2014-03] [float] NULL,
	[2014-04] [float] NULL,
	[2014-05] [float] NULL,
	[2014-06] [float] NULL,
	[2014-07] [float] NULL,
	[2014-08] [float] NULL,
	[2014-09] [float] NULL,
	[2014-10] [float] NULL,
	[2014-11] [float] NULL,
	[2014-12] [float] NULL,
	[2015-01] [float] NULL,
	[2015-02] [float] NULL,
	[2015-03] [float] NULL,
	[2015-04] [float] NULL,
	[2015-05] [float] NULL,
	[2015-06] [float] NULL,
	[2015-07] [float] NULL,
	[2015-08] [float] NULL,
	[2015-09] [float] NULL,
	[2015-10] [float] NULL,
	[2015-11] [float] NULL,
	[2015-12] [float] NULL,
	[2016-01] [float] NULL,
	[2016-02] [float] NULL,
	[2016-03] [float] NULL,
	[2016-04] [float] NULL,
	[2016-05] [float] NULL,
	[2016-06] [float] NULL,
	[2016-07] [float] NULL,
	[2016-08] [float] NULL,
	[2016-09] [float] NULL,
	[2016-10] [float] NULL,
	[2016-11] [float] NULL,
	[2016-12] [float] NULL,
	[2017-01] [float] NULL,
	[2017-02] [float] NULL,
	[2017-03] [float] NULL,
	[2017-04] [float] NULL,
	[2017-05] [float] NULL,
	[2017-06] [float] NULL,
	[2017-07] [float] NULL,
	[2017-08] [float] NULL,
	[2017-09] [float] NULL,
	[2017-10] [float] NULL,
	[2017-11] [float] NULL,
	[2017-12] [float] NULL,
	[2018-01] [float] NULL,
	[2018-02] [float] NULL,
	[2018-03] [float] NULL,
	[2018-04] [float] NULL,
	[2018-05] [float] NULL,
	[2018-06] [float] NULL,
	[2018-07] [float] NULL,
	[2018-08] [float] NULL,
	[2018-09] [float] NULL,
	[2018-10] [float] NULL,
	[2018-11] [float] NULL,
	[2018-12] [float] NULL,
	[2019-01] [float] NULL,
	[2019-02] [float] NULL,
	[2019-03] [float] NULL,
	[2019-04] [float] NULL,
	[2019-05] [float] NULL,
	[2019-06] [float] NULL,
	[2019-07] [float] NULL,
	[2019-08] [float] NULL,
	[2019-09] [float] NULL,
	[2019-10] [float] NULL,
	[2019-11] [float] NULL,
	[2019-12] [float] NULL,
    [2020-01] [float] NULL,
    [2020-02] [float] NULL,
    [2020-03] [float] NULL,
    [2020-04] [float] NULL,
    [2020-05] [float] NULL,
    [2020-06] [float] NULL,
    [2020-07] [float] NULL,
    [2020-08] [float] NULL,
    [2020-09] [float] NULL,
    [2020-10] [float] NULL,
    [2020-11] [float] NULL,
    [2020-12] [float] NULL,
    [2021-01] [float] NULL,
    [2021-02] [float] NULL,
    [2021-03] [float] NULL,
    [2021-04] [float] NULL,
    [2021-05] [float] NULL,
    [2021-06] [float] NULL
    [2021-07] [float] NULL,
    [2021-08] [float] NULL,
    [2021-09] [float] NULL,
    [2021-10] [float] NULL,
    [2021-11] [float] NULL,
    [2021-12] [float] NULL,
    [2022-01] [float] NULL,
    [2022-02] [float] NULL,
    [2022-03] [float] NULL,
    [2022-04] [float] NULL,
    [2022-05] [float] NULL,
    [2022-06] [float] NULL,
    [2022-07] [float] NULL,
    [2022-08] [float] NULL,
    [2022-09] [float] NULL,
    [2022-10] [float] NULL,
    [2022-11] [float] NULL,
    [2022-12] [float] NULL
) ON [PRIMARY]''')

In [None]:
params = urllib.parse.quote_plus(r'Driver={SQL Server};' 
                                 r'Server=TITANIUM-BOOK;'
                                 r'Database=DataDashboard;'
                                 r'Trusted_Connection=yes;')

engine = create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)

#warning: discard old table if exists
df_mlp_nc.to_sql('STG_ZLLW_County_MedianListingPrice_AllHomes', con=engine, if_exists='replace', index=False)