## Methods Checkpoint

In [2]:
import pandas as pd
import altair as alt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics

alt.data_transformers.enable('data_server')

DataTransformerRegistry.enable('data_server')

In [3]:
dwellings_denver = pd.read_csv("dwellings_denver.csv")
dwellings_ml = pd.read_csv("dwellings_ml.csv")

In [None]:
dwellings_denver.columns

Index(['parcel', 'nbhd', 'abstrprd', 'livearea', 'finbsmnt', 'basement',
       'yrbuilt', 'condition', 'quality', 'totunits', 'stories', 'gartype',
       'nocars', 'xtraffic', 'floorlvl', 'numbdrm', 'numbaths', 'arcstyle',
       'sprice', 'deduct', 'netprice', 'tasp', 'smonth', 'syear', 'qualified',
       'status'],
      dtype='object')

In [None]:
dwellings_ml.columns

Index(['parcel', 'abstrprd', 'livearea', 'finbsmnt', 'basement', 'yrbuilt',
       'totunits', 'stories', 'nocars', 'numbdrm', 'numbaths', 'sprice',
       'deduct', 'netprice', 'tasp', 'smonth', 'syear', 'condition_AVG',
       'condition_Excel', 'condition_Fair', 'condition_Good',
       'condition_VGood', 'quality_A', 'quality_B', 'quality_C', 'quality_D',
       'quality_X', 'gartype_Att', 'gartype_Att/Det', 'gartype_CP',
       'gartype_Det', 'gartype_None', 'gartype_att/CP', 'gartype_det/CP',
       'arcstyle_BI-LEVEL', 'arcstyle_CONVERSIONS', 'arcstyle_END UNIT',
       'arcstyle_MIDDLE UNIT', 'arcstyle_ONE AND HALF-STORY',
       'arcstyle_ONE-STORY', 'arcstyle_SPLIT LEVEL', 'arcstyle_THREE-STORY',
       'arcstyle_TRI-LEVEL', 'arcstyle_TRI-LEVEL WITH BASEMENT',
       'arcstyle_TWO AND HALF-STORY', 'arcstyle_TWO-STORY', 'qualified_Q',
       'qualified_U', 'status_I', 'status_V', 'before1980'],
      dtype='object')

In [None]:
targets = dwellings_ml["dwelling_conditions"]
features = dwellings_ml.drop(columns=["yrbuilt","dwelling_conditions","parcel"])

In [None]:
features_train, features_test, targets_train, targets_test  = train_test_split(features, targets, test_size= .34, random_state= 76)

In [None]:
np.mean(targets_test.head(10))

0.4

In [None]:
np.mean(features_train.sprice.head(10))

2131970.0

In [None]:
# create the model
classifier = GaussianNB()

# train the model
classifier.fit(features_train, targets_train)

# make predictions
targets_predicted = classifier.predict(features_test)

# evaluate model (see how good the model is)
metrics.accuracy_score(targets_test, targets_predicted)

0.6696187909125915

In [6]:
# dwelling_conditions = dwellings_ml[dwellings_ml["yrbuilt"] <= 1980]

dwelling_conditions = (dwellings_ml.groupby("yrbuilt")
    .agg(total_condition_AVG = ("condition_AVG", sum),
        total_condition_Excel = ("condition_Excel", sum),
        total_condition_Fair = ("condition_Fair", sum),
        total_condition_Good = ("condition_Good", sum),
        total_condition_VGood = ("condition_VGood", sum))
    .reset_index()
    )
# dwelling_conditions.columns = ["ysbuilt","total_houses"]

dwelling_conditions


Unnamed: 0,yrbuilt,total_condition_AVG,total_condition_Excel,total_condition_Fair,total_condition_Good,total_condition_VGood
0,1873,1,0,0,0,0
1,1879,1,0,0,0,0
2,1880,12,0,0,3,0
3,1881,1,0,0,2,0
4,1882,4,0,0,3,0
...,...,...,...,...,...,...
131,2009,343,2,0,5,1
132,2010,604,0,0,21,6
133,2011,1073,6,0,29,8
134,2012,794,1,0,34,2


In [5]:
AVG_condition_chart = (alt.Chart(dwelling_conditions)
            .encode(y="total_condition_AVG", x="yrbuilt")
            .mark_line())

AVG_condition_chart

In [7]:
Excel_condition_chart = (alt.Chart(dwelling_conditions)
            .encode(y="total_condition_Excel", x="yrbuilt")
            .mark_line(color="red"))

Excel_condition_chart

In [11]:
Fair_condition_chart = (alt.Chart(dwelling_conditions)
            .encode(y="total_condition_Fair", x="yrbuilt")
            .mark_bar(color="green"))

Fair_condition_chart

In [10]:
Good_condition_chart = (alt.Chart(dwelling_conditions)
            .encode(y="total_condition_Good", x="yrbuilt")
            .mark_bar(color="orange"))

Good_condition_chart

In [12]:
VGood_condition_chart = (alt.Chart(dwelling_conditions)
            .encode(y="total_condition_VGood", x="yrbuilt")
            .mark_b(color="purple"))

VGood_condition_chart

In [None]:
final_condition_chart = AVG_condition_chart + Excel_condition_chart + Fair_condition_chart + Good_condition_chart + VGood_condition_chart
final_condition_char = (alt.Chart().encode(alt.Y("avg", axis=alt.Axis(title="Average Salary ($)",))))

final_condition_chart