In [155]:
import pandas as pd
import Levenshtein
import math

In [156]:
pd.options.display.max_columns = 22
pd.options.display.max_rows = 400

### Read in all taxes data + prelim cleaning

In [157]:
df_taxes_list = []
path = "~/Dropbox (Amherst College)/CDS-2019-AlbanyHub/Raw-Data/"
for i in range(2009, 2020):
    df_taxes_list.append(pd.read_csv(path+"AY"+str(i)+ " Real Property_Personal Property.csv"))
df_full_taxes = pd.concat(df_taxes_list, ignore_index=True)
df_full_taxes.head();

In [158]:
df_full_taxes.shape

(421915, 21)

In [159]:
df_home = pd.read_csv('~/Dropbox (Amherst College)/CDS-2019-AlbanyHub/Raw-Data/fixed_home_v02.csv')
df_cdbg = pd.read_csv('~/Dropbox (Amherst College)/CDS-2019-AlbanyHub/Raw-Data/fixed_cdbg.csv')

In [160]:
def genStrip():
    return lambda addr: addr.strip()

In [161]:
def genSpacing():
    def test(word):
        if word == "  ":
            return ""
        elif type(word) == float:
            if math.isnan(word):
                return ""
        else:
            return word
    return test

In [162]:
df_full_taxes['FULL_ADDRESS'] = df_full_taxes['HOUSE_NO'].astype(str).apply(genStrip()) + \
' ' + df_full_taxes['STDIRECT'].apply(genSpacing()) + df_full_taxes["STREET_NAM"].apply(genStrip()) + ' ' + df_full_taxes["STREET_TYPE"].astype(str).apply(genStrip())

In [163]:
df_full_taxes.head()

Unnamed: 0,OWNER_NAME,HOUSE_NO,STDIRECT,STREET_NAM,STREET_TYPE,UNIT,PROP_ZIP,ZONE_CODE,SUBDIVISION,PARCEL_NO,PREV_VAL,CURR_VAL,VALCHGDATE,TAXDISTRIC,HOMEEXEMPT,TOTALACRES,PROPERTY_CLASS,DIG_STRAT,BUSI_ID,NAICS,PERSKEY,FULL_ADDRESS
0,NICHOLS DORRIS WILDER,0,,A C L RAILROAD,,,,C3,COUNTY LINE - ACREE AREA,00104/00001/014,13100,13100,11/12/2008,2,S0,0.0,RESIDENTIAL,3,,,,0 A C L RAILROAD nan
1,WESTON MONTY,0,,A C L RAILROAD,,,,C3,COUNTY LINE - ACREE AREA,00104/00001/015,26300,26300,11/12/2008,2,S0,0.39,COMMERCIAL,3,,,,0 A C L RAILROAD nan
2,NICHOLS JOE,0,,A C L RAILROAD,,,,C3,COUNTY LINE - ACREE AREA,00104/00001/016,990,990,11/12/2008,2,S0,0.25,COMMERCIAL,3,,,,0 A C L RAILROAD nan
3,ISRAEL CAROLYN R ETAL,0,,A C L RAILROAD,,,,AG,MASSEY,00155/00004/013,21400,21400,4/16/2007,1,S0,17.19,RESIDENTIAL,4,,,,0 A C L RAILROAD nan
4,GEER RET,0,,A C L RAILROAD,,,,AG,LIBERTY EXPRESSWAY INDUSTRIAL AREA,00186/00001/006,28600,28600,6/20/2007,1,S0,14.3,RESIDENTIAL,4,,,,0 A C L RAILROAD nan


In [164]:
df_house = pd.concat([df_home, df_cdbg], ignore_index = True)
df_house.drop('Unnamed: 0', axis = 1, inplace = True)
s_house = pd.Series(df_house['Address'].unique()) # get unique values for housing proj addresses

### Read in addr junction table list and compare

In [165]:
df_addr = pd.read_csv('~/Dropbox (Amherst College)/CDS-2019-AlbanyHub/ToDatabase/addr_junct_table.csv')

In [166]:
f_addresses = df_addr['Address']
t_addresses = pd.Series(df_full_taxes['FULL_ADDRESS'].unique()) # to compare

In [167]:
t_addresses[~t_addresses.isin(f_addresses)].shape

(16224,)

In [168]:
no_tax = f_addresses[~f_addresses.isin(t_addresses)] # addresses that we "don't" have tax data for

In [169]:
no_tax.shape

(9029,)

In [170]:
def lev_list(address_list, address, i): # will help us find near matches
    temp_list = []
    temp_list_u = []
    for u_address in address_list:
        if Levenshtein.distance(address, u_address.upper()) <= i:
            temp_list.append(address)
            temp_list_u.append(u_address.upper())
    temp_list_series = pd.Series(temp_list)
    temp_list_u_series = pd.Series(temp_list_u)
    return temp_list_u_series

In [171]:
test = lev_list(t_addresses, "610 W GORDON AVE", 3) # some exploratory checking

In [172]:
f_addresses.head()

0    501 S MADISON ST
1     525 LINCOLN AVE
2    609 S JACKSON ST
3     602 S MONROE ST
4    509 W GORDON AVE
Name: Address, dtype: object

In [173]:
def gen_list_none(address_list, mismatched, i): # just to find how many addresses are so far off from anything in full
    tmp_list = []
    for address in mismatched:
        if address == "UNKNOWN ADDRESS" or address == "SUPPRESSED ADDRESS":
            continue
        if lev_list(address_list, address, i).shape[0] == 0:
            tmp_list.append(address)
    return pd.Series(tmp_list)

In [174]:
def contains_cardinal(addr):
    directions = [' E ', ' W ', ' N ', ' S ', ' NE ', ' NW ', ' SW ', ' SE ']
    count = 0
    for car in directions:
        if car in addr:
            if "JOHNNY W WILLIAMS" in addr:
                return None
            return car # tested earlier, only one instance of each in every single address
    return None

In [175]:
#def test_contains_cardinal():    
#    lis = []
#    for i in range(len(no_tax)):
#        addr = no_tax.iloc[i]
#        res = contains_cardinal(addr)
#        lis.append(res)
#    tmp = pd.Series(lis)
#    return tmp
#test_contains_cardinal().value_counts()

In [176]:
f_addresses[f_addresses.str.contains("2430 ROSEBRIER")] #POTENTIALLY A BIT OF AN ISSUE

2725     2430 ROSEBRIER DR
5288     2430 ROSEBRIER RD
5428    2430 ROSEBRIER RD.
Name: Address, dtype: object

In [177]:
df_full_taxes[df_full_taxes['FULL_ADDRESS'].str.contains("2430 ROSEBRIER AVE")]

Unnamed: 0,OWNER_NAME,HOUSE_NO,STDIRECT,STREET_NAM,STREET_TYPE,UNIT,PROP_ZIP,ZONE_CODE,SUBDIVISION,PARCEL_NO,PREV_VAL,CURR_VAL,VALCHGDATE,TAXDISTRIC,HOMEEXEMPT,TOTALACRES,PROPERTY_CLASS,DIG_STRAT,BUSI_ID,NAICS,PERSKEY,FULL_ADDRESS
28599,ALBANY ADVOCACY RESOURCE CTR,2430,,ROSEBRIER,AVE,,,R6,HOLLY BERRY SUBD,000RR/00004/14F,10000,10000,12/20/2007,1,S0,0.41,,3,,,,2430 ROSEBRIER AVE
66486,ALBANY HOUSING VII INC,2430,,ROSEBRIER,AVE,,,R6,HOLLY BERRY SUBD,000RR/00004/14F,10000,10000,,1,S0,0.41,,1,,,,2430 ROSEBRIER AVE
104579,ALBANY HOUSING VII INC,2430,,ROSEBRIER,AVE,,,R6,HOLLY BERRY SUBD,000RR/00004/14F,10000,10000,,1,S0,0.41,,1,,,,2430 ROSEBRIER AVE
142671,ALBANY HOUSING VII INC,2430,,ROSEBRIER,AVE,,,R6,HOLLY BERRY SUBD,000RR/00004/14F,10000,10000,,1,S0,0.41,,1,,,,2430 ROSEBRIER AVE
180752,ALBANY HOUSING VII INC,2430,,ROSEBRIER,AVE,,,R6,HOLLY BERRY SUBD,000RR/00004/14F,10000,10000,,1,S0,0.41,,1,,,,2430 ROSEBRIER AVE
218867,ALBANY HOUSING VII INC,2430,,ROSEBRIER,AVE,,,R6,HOLLY BERRY SUBD,000RR/00004/14F,10000,10000,,1,S0,0.41,,1,,,,2430 ROSEBRIER AVE
256928,ALBANY HOUSING VII INC,2430,,ROSEBRIER,AVE,,,R6,HOLLY BERRY SUBD,000RR/00004/14F,10000,7000,4/20/2015,1,S0,0.41,,1,,,,2430 ROSEBRIER AVE
294856,ALBANY HOUSING VII INC,2430,,ROSEBRIER,AVE,,31705.0,R6,HOLLY BERRY SUBD,000RR/00004/14F,7000,7000,4/20/2015,1,S0,0.41,,1,,,,2430 ROSEBRIER AVE
333499,ALBANY HOUSING VII INC,2430,,ROSEBRIER,AVE,,31705.0,R6,HOLLY BERRY SUBD,000RR/00004/14F,7000,7000,4/20/2015,1,S0,0.41,,1,,,,2430 ROSEBRIER AVE
373009,ALBANY HOUSING VII INC,2430,,ROSEBRIER,AVE,,31705.0,R6,HOLLY BERRY SUBD,000RR/00004/14F,7000,7000,4/20/2015,1,S0,0.41,,1,,,,2430 ROSEBRIER AVE


In [178]:
no_tax[no_tax.str.contains("ROSEBRIER")]

1363     2101 ROSEBRIER AVE
2725      2430 ROSEBRIER DR
5163     2444 ROSEBRIER AVE
5288      2430 ROSEBRIER RD
5428     2430 ROSEBRIER RD.
28601    2600 ROSEBRIER AVE
Name: Address, dtype: object

In [179]:
no_tax.iloc[2249] = '1006 S PHILEMA RD'# switcherooni
no_tax.iloc[148] = '508 SE LIBERTY EXPRESS WAY'

In [180]:
t_addresses_set = set(t_addresses)

In [181]:
def construct_st_name(address):
    split = address.split()
    end = split[-1]
    first = "NA"
    start_index = 0
    end_index = len(split) - 1
    if split[0].isnumeric():
        start_index += 1
    if contains_cardinal(address) != None:
        start_index += 1
    first = split[start_index:end_index]
    word = ""
    for a in first:
        word += a + " "
    word = word.strip()
    return word

In [182]:
def construct_dictionary():
    endings = {'AVE', 'DR', 'RD', 'ST', 'LN', 'CT', 'BLVD', 'CIR', 
               'WAY', 'PL', 'EXPY', 'ALY', 'TRL', 'PKWY', 'TER'}
    dictionary = {}
    counter = 0
    for i in range(len(f_addresses)):
        split = f_addresses[i].split()
        end = split[-1]
        if end in endings:
            st_name = construct_st_name(f_addresses[i])
            if st_name in dictionary.keys():
                existing_endings = dictionary[st_name]
                if end not in existing_endings:
                    counter += 1
                    existing_endings.append(end)
            else:
                dictionary[st_name] = [end]
    print(counter)
    return dictionary

In [183]:
def test():
    lis = []
    newly_found = []
    words = []
    dictionary = construct_dictionary()
    counter = 0
    for i in range(len(no_tax)):
        split = no_tax.iloc[i].split()
        word = split[:-1]
        wordo = ""
        for a in word:
            wordo += a + " "
        wordo = wordo.strip()
        st_name = construct_st_name(no_tax.iloc[i])
        found = False
        if st_name in dictionary.keys():
            for ending in dictionary[st_name]: #test a full address
                test = wordo + " " + ending
                words.append(test)
                if test in t_addresses_set:
                    newly_found.append(test)
                    found = True
                    continue
            if not found:
                newly_found.append(no_tax.iloc[i])
        else:
            newly_found.append(no_tax.iloc[i])
            
    print(counter)
    return pd.Series(newly_found)

In [184]:
newly_found = test()

86
0


In [185]:
no_tax.shape

(9029,)

In [186]:
newly_found.shape

(9029,)

In [187]:
lis = []
for i in range(len(newly_found)):
    word = newly_found[i].split()
    word = word[1:-1]
    lis.append(word)
tmp = pd.Series(lis)

In [188]:
newly_found[newly_found.str.contains("ROSEBRIER")]

556     2101 ROSEBRIER AVE
1008    2430 ROSEBRIER AVE
1626    2444 ROSEBRIER AVE
1653    2430 ROSEBRIER AVE
1694    2430 ROSEBRIER AVE
8462    2600 ROSEBRIER AVE
dtype: object

In [189]:
lis.value_counts()

AttributeError: 'list' object has no attribute 'value_counts'

In [190]:
def fix_addresses():
    lis = []
    newly_found_2 = []
    dictionary = construct_dictionary()
        
    for i in range(len(newly_found)): # loop through values in main that do not have tax data
        addr = newly_found.iloc[i]
        car = contains_cardinal(addr)
        ind = "NA"
        if car != None:
            ind = addr.find(car)
            addr = addr[:ind] + " " + addr[ind+3:]
            if addr in t_addresses_set: # check if we can find the addr if we take out the cardinal direction
                newly_found_2.append(addr) # if we can, we ultimately want to update the tax data with the correct direction
                continue
            else:
                newly_found_2.append(addr)
        else:
            newly_found_2.append(addr)
        lis.append(addr)
    tmp = pd.Series(lis)
    newly_found_2 = pd.Series(newly_found_2)
    return newly_found_2

In [191]:
newly_found_2 = fix_addresses()

86


In [192]:
newly_found_2.shape

(9029,)

In [193]:
newly_found.head()

0    509 W GORDON AVE
1    610 W GORDON AVE
2    700 W GORDON AVE
3    708 W GORDON AVE
4       2632 ERICA CT
dtype: object

In [200]:
no_tax.index = range(len(no_tax))

(3817,)

In [204]:
newly_found.head(15)

0           509 W GORDON AVE
1           610 W GORDON AVE
2           700 W GORDON AVE
3           708 W GORDON AVE
4              2632 ERICA CT
5         235 BONNY VIEW AVE
6               1200 AUGUSTA
7           205 S COLLINS ST
8           1705 E BROAD AVE
9     1001 RADIUM SPRINGS RD
10           109 W BROAD AVE
11             110 SHELBY LN
12             112 SHELBY LN
13        252 BONNY VIEW AVE
14        243 BONNY VIEW AVE
dtype: object

In [208]:
t_addresses.head()

0       0 A C L RAILROAD nan
1    4408 A C L RAILROAD nan
2              2700 ABBEY LN
3              2701 ABBEY LN
4              2702 ABBEY LN
dtype: object

In [223]:
no_tax[~(no_tax != newly_found_2)].head(30)

4                 2632 ERICA CT
5            235 BONNY VIEW AVE
6                  1200 AUGUSTA
9        1001 RADIUM SPRINGS RD
11                110 SHELBY LN
12                112 SHELBY LN
13           252 BONNY VIEW AVE
14           243 BONNY VIEW AVE
15           247 BONNY VIEW AVE
16           248 BONNY VIEW AVE
17           251 BONNY VIEW AVE
18              2530 STUART AVE
19               2726 BETTYS DR
21             2448 BROADWAY CT
22              248 BROADWAY CT
23              713 SHAMROCK DR
24              715 SHAMROCK DR
28           3353 TWINFLOWER RD
29              405 THORNTON DR
30              100 BROADWAY CT
31               2729 BETTYS DR
32       4108 RADIUM SPRINGS RD
35                 501 15TH AVE
37              2220 PALMYRA RD
39           3213 WAR EAGLE AVE
40               1507 DAWSON RD
41    2507 COOLEEWAHEE COVES CT
44    515 ABIGAIL PLANTATION RD
46               105 SPANISH CT
47                  1541 US 19S
Name: Address, dtype: object

In [226]:
no_tax[no_tax.str.contains("TWINFLOWER")].shape

(47,)

In [227]:
lev_list(t_addresses, '3353 TWINFLOWER RD', 4)

0     3203 TWIN FLOWER RD
1     3302 TWIN FLOWER RD
2     3304 TWIN FLOWER RD
3     3305 TWIN FLOWER RD
4     3306 TWIN FLOWER RD
5     3307 TWIN FLOWER RD
6     3308 TWIN FLOWER RD
7     3309 TWIN FLOWER RD
8     3310 TWIN FLOWER RD
9     3311 TWIN FLOWER RD
10    3312 TWIN FLOWER RD
11    3313 TWIN FLOWER RD
12    3314 TWIN FLOWER RD
13    3315 TWIN FLOWER RD
14    3316 TWIN FLOWER RD
15    3317 TWIN FLOWER RD
16    3318 TWIN FLOWER RD
17    3319 TWIN FLOWER RD
18    3320 TWIN FLOWER RD
19    3321 TWIN FLOWER RD
20    3322 TWIN FLOWER RD
21    3323 TWIN FLOWER RD
22    3324 TWIN FLOWER RD
23    3325 TWIN FLOWER RD
24    3327 TWIN FLOWER RD
25    3329 TWIN FLOWER RD
26    3330 TWIN FLOWER RD
27    3331 TWIN FLOWER RD
28    3333 TWIN FLOWER RD
29    3335 TWIN FLOWER RD
30    3336 TWIN FLOWER RD
31    3337 TWIN FLOWER RD
32    3338 TWIN FLOWER RD
33    3339 TWIN FLOWER RD
34    3340 TWIN FLOWER RD
35    3341 TWIN FLOWER RD
36    3342 TWIN FLOWER RD
37    3343 TWIN FLOWER RD
38    3344 T

In [202]:
Actually need to implement changes now

SyntaxError: invalid syntax (<ipython-input-202-d95afe180930>, line 1)

#### CHECK IF EACH HAS AN ENDING.
CHECK ENDINGS

In [39]:
tmp.head(20)

0             509 GORDON AVE
1              2632 ERICA CT
2         235 BONNY VIEW AVE
3               1200 AUGUSTA
4             205 COLLINS ST
5             1705 BROAD AVE
6     1001 RADIUM SPRINGS RD
7              109 BROAD AVE
8              110 SHELBY LN
9              112 SHELBY LN
10        252 BONNY VIEW AVE
11        243 BONNY VIEW AVE
12        247 BONNY VIEW AVE
13        248 BONNY VIEW AVE
14        251 BONNY VIEW AVE
15           2530 STUART AVE
16            2726 BETTYS DR
17            4805 HILL ROAD
18          2448 BROADWAY CT
19           248 BROADWAY CT
dtype: object

In [45]:
lev_list(t_addresses, '110 SHELBY LN', 3)

0       0 SHELBY LN
1     110 SHELBY DR
2     111 SHELBY DR
3     112 SHELBY DR
4     113 SHELBY DR
5     114 SHELBY LN
6     115 SHELBY LN
7     117 SHELBY LN
8     119 SHELBY LN
9     121 SHELBY LN
10    122 SHELBY LN
11    123 SHELBY LN
12    125 SHELBY LN
13    127 SHELBY LN
14    204 SHELBY LN
15    205 SHELBY LN
16    206 SHELBY LN
17    207 SHELBY LN
18    208 SHELBY LN
19    209 SHELBY LN
20    210 SHELBY LN
21    211 SHELBY LN
22    212 SHELBY LN
23    213 SHELBY LN
24    214 SHELBY LN
25    215 SHELBY LN
26    216 SHELBY LN
27    217 SHELBY LN
28    218 SHELBY LN
29    219 SHELBY LN
30    220 SHELBY LN
31    221 SHELBY LN
32    222 SHELBY LN
33    223 SHELBY LN
34    224 SHELBY LN
dtype: object

In [109]:
no_tax.iloc[148]

'508 LIBERTY EXPRESS SE WAY'

### Need to think about types of joins

#### We're trying to modify our tax data so that any conflicts or typos in tax are overwritten by full

#### But if it is not a typo (valid address), then we want to add it to our address full list (actually, not super necessary)

#### might be worth to look into an address standardizer (didn't really work)

###### side note--might be good to look into how many years we have for a an address, on average

#### What sorts of cases are there?

Questions:
how do we deal with apt buildings? need some standard way of doing that.
how can we connect properties with 0 for the number? Tax assessor's data is incomplete.
ultimately it would be unreasonable to get 1 on 1 match

-- Given an housing address, we have tax data.
-- Given an housing address, we do not have tax data.
---- If we do not have tax data, why not?
---- Typo in housing data or tax data
-------- The address in housing data is either 'correct' or 'incorrect' but we update tax data
-------- Missing street direction (somewhat easier)
-------- Wrong street ending (somewhat easier)
-------- Different spellings (somewhat harder)
-------- Wrong number or 0 for number (very hard)
---- Missing address in tax data
-------- add the housing address to the tax data and give it a value of "Not Found" (keep track of all of these)
-------- Potentially some manual fixing, but that is worst case


In [33]:
no_tax[]

2430 CLARK AVE                   1
2301 BETTYS DR                   1
1403 EAGER DR                    1
1206 6TH AVE                     1
1007 W SOCIETY AVE               1
504 5TH AVE                      1
1 EASY DR                        1
503 E BROAD AVE                  1
1232 PAUL LN                     1
1507 TAFT ST                     1
1411 SHARON DR                   1
1608 DORCHESTER                  1
1216 E 1ST AVE                   1
1110 LIBERTY EXPY                1
121 WHITTLESEY CT                1
3307 TWINFLOWER RD               1
2406 BETTYS DR                   1
5508 RANDALL DR                  1
1017 N SLAPPEY BLVD              1
800 RADIUM SPRINGS RD            1
908 W RESIDENCE AVE              1
2200 CLARK AVE                   1
128 BELMONT DR                   1
15 SUTTON PL                     1
510 10TH AVE                     1
1209 10TH AVE                    1
2001 COVES CT                    1
3624 QUAIL HOLLOW RD             1
212 HONEYSUCKLE DR  

In [20]:
df_full_taxes[df_full_taxes['STREET_NAM'].str.contains("GORDON")]

Unnamed: 0,OWNER_NAME,HOUSE_NO,STDIRECT,STREET_NAM,STREET_TYPE,UNIT,PROP_ZIP,ZONE_CODE,SUBDIVISION,PARCEL_NO,PREV_VAL,CURR_VAL,VALCHGDATE,TAXDISTRIC,HOMEEXEMPT,TOTALACRES,PROPERTY_CLASS,DIG_STRAT,BUSI_ID,NAICS,PERSKEY,FULL_ADDRESS
12778,MILLER GREG A SR & CHERYL A,0,,GORDON,AVE,,,M1,GORDON AREA WEST,00240/00015/001,14800,14800,9/6/2007,1,S0,7.40,COMMERCIAL,4,,,,0 GORDON AVE
12779,METHODIST CITY BOARD INC,0,,GORDON,AVE,,,C1,GORDON AREA WEST,00240/00002/001,29200,29200,12/26/2007,1,S0,2.92,,2,,,,0 GORDON AVE
12780,D & D WOOD PRESERVING INC,0,,GORDON,AVE,,,M1,GORDON AREA WEST,00240/00014/001,536400,536400,10/31/2008,1,S0,5.28,COMMERCIAL,4,,,,0 GORDON AVE
12781,SOUTH GEORGIA BRICK CO INC,0,,GORDON,AVE,,,M1,GORDON AREA WEST,00240/00014/006,125500,125500,3/11/2009,1,S0,3.05,COMMERCIAL,3,,,,0 GORDON AVE
12782,TRINITY METROPOLITAN BAPTIST CHURCH INC,0,,GORDON,AVE,,,M1,GORDON COMMERCIAL,00240/00009/001,59400,59400,6/16/2009,1,S0,11.89,,2,,,,0 GORDON AVE
12783,HH&A'S LLC,0,,GORDON,AVE,,,R1B,AVONDALE,0000Q/00048/01B,12600,12600,3/14/2007,1,S0,0.52,COMMERCIAL,3,,,,0 GORDON AVE
12784,STARTER HOME BUILDERS LLC &,0,,GORDON,AVE,,,R6,GORDON AREA WEST,00240/00001/009,20600,20600,12/26/2007,1,S0,3.44,RESIDENTIAL,3,,,,0 GORDON AVE
12785,CITY OF ALBANY,307,,GORDON,AVE,,,R6,CITY OF ALBANY/LOTS,0000E/00026/009,2000,2000,7/13/2007,1,S0,0.08,,1,,,,307 GORDON AVE
12786,CITY OF ALBANY,309,,GORDON,AVE,,,R6,CITY OF ALBANY/LOTS,0000E/00026/008,1900,1900,7/13/2007,1,S0,0.07,,1,,,,309 GORDON AVE
12787,JORDAN JOSEPH L MD PC,310,,GORDON,AVE,,,C1,HOLLYWOOD,0000I/00004/11A,347600,347600,10/31/2008,1,S0,1.69,COMMERCIAL,3,,,,310 GORDON AVE


In [21]:
t_addresses[t_addresses.str.contains("GORDON")]

12207         0 GORDON AVE
12208       307 GORDON AVE
12209       309 GORDON AVE
12210       310 GORDON AVE
12211       311 GORDON AVE
12212       315 GORDON AVE
12213       323 GORDON AVE
12214       325 GORDON AVE
12215       327 GORDON AVE
12216       329 GORDON AVE
12217       403 GORDON AVE
12218       405 GORDON AVE
12219       407 GORDON AVE
12220       409 GORDON AVE
12221       411 GORDON AVE
12222       413 GORDON AVE
12223       415 GORDON AVE
12224       419 GORDON AVE
12225       421 GORDON AVE
12226       423 GORDON AVE
12227       425 GORDON AVE
12228       427 GORDON AVE
12229       502 GORDON AVE
12230       506 GORDON AVE
12231       507 GORDON AVE
12232       511 GORDON AVE
12233       512 GORDON AVE
12234       513 GORDON AVE
12235       515 GORDON AVE
12236       518 GORDON AVE
12237       519 GORDON AVE
12238       520 GORDON AVE
12239       522 GORDON AVE
12240       524 GORDON AVE
12241       526 GORDON AVE
12242       528 GORDON AVE
12243       532 GORDON AVE
1

In [23]:
df_full_taxes[df_full_taxes['STDIRECT']=='R ']

Unnamed: 0,OWNER_NAME,HOUSE_NO,STDIRECT,STREET_NAM,STREET_TYPE,UNIT,PROP_ZIP,ZONE_CODE,SUBDIVISION,PARCEL_NO,PREV_VAL,CURR_VAL,VALCHGDATE,TAXDISTRIC,HOMEEXEMPT,TOTALACRES,PROPERTY_CLASS,DIG_STRAT,BUSI_ID,NAICS,PERSKEY,FULL_ADDRESS
37236,KEARCE WOODROW,0,R,WILLIAMSBURG,RD,,,AG,DIXIE PECAN ORCH,00180/00001/01J,122400,122400,8/18/2008,2,S0,5.0,,3,,,,0 R WILLIAMSBURG RD
75127,KEARCE WOODROW,0,R,WILLIAMSBURG,RD,,,AG,DIXIE PECAN ORCH,00180/00001/01J,122400,122400,8/18/2008,2,S0,5.0,,3,,,,0 R WILLIAMSBURG RD
113231,KEARCE WOODROW,0,R,WILLIAMSBURG,RD,,,AG,DIXIE PECAN ORCH,00180/00001/01J,122400,122400,7/13/2011,2,S0,5.0,,3,,,,0 R WILLIAMSBURG RD
151295,SB&T BANK,0,R,WILLIAMSBURG,RD,,,AG,DIXIE PECAN ORCH,00180/00001/01J,122400,122400,11/23/2011,2,S0,5.0,,3,,,,0 R WILLIAMSBURG RD
189392,THOMPSON P RYAN,0,R,WILLIAMSBURG,RD,,,AG,DIXIE PECAN ORCH,00180/00001/01J,122400,122400,,2,S0,5.0,,3,,,,0 R WILLIAMSBURG RD


In [24]:
df_full_taxes['STDIRECT'][0]

nan

In [25]:
df_full_taxes[df_full_taxes['STREET_NAM'].str.contains("MADISON")]

Unnamed: 0,OWNER_NAME,HOUSE_NO,STDIRECT,STREET_NAM,STREET_TYPE,UNIT,PROP_ZIP,ZONE_CODE,SUBDIVISION,PARCEL_NO,PREV_VAL,CURR_VAL,VALCHGDATE,TAXDISTRIC,HOMEEXEMPT,TOTALACRES,PROPERTY_CLASS,DIG_STRAT,BUSI_ID,NAICS,PERSKEY,FULL_ADDRESS
19617,MADISON PARK DEVELOPMENT INC,0,S,MADISON,ST,,,R1,MADISON PARK,00012/00003/21E,3500,3500,10/16/2007,1,S0,0.07,RESIDENTIAL,3,,,,0 S MADISON ST
19618,MADISON PARK DEVELOPMENT INC,0,S,MADISON,ST,,,R1,MADISON PARK,00012/00003/21F,3500,3500,10/16/2007,1,S0,0.07,RESIDENTIAL,3,,,,0 S MADISON ST
19619,MADISON PARK DEVELOPMENT INC,0,S,MADISON,ST,,,R1,MADISON PARK,00012/00003/21A,3500,3500,10/16/2007,1,S0,0.07,RESIDENTIAL,3,,,,0 S MADISON ST
19620,MADISON PARK DEVELOPMENT INC,0,S,MADISON,ST,,,R1,MADISON PARK,00012/00003/21D,7500,7500,10/1/2008,1,S0,0.16,RESIDENTIAL,3,,,,0 S MADISON ST
19621,ALBANY GA CITY OF DCED,0,S,MADISON,ST,,,R3,RIVER ROAD,00012/00003/20B,730,730,10/16/2007,1,S0,0.23,,1,,,,0 S MADISON ST
19622,THOMAS ROBERT D,0,S,MADISON,ST,,,R3,RIVER ROAD,00012/00002/52C,1700,1700,3/14/2007,1,S0,0.42,RESIDENTIAL,3,,,,0 S MADISON ST
19623,ALBANY GA CITY OF DCED,0,S,MADISON,ST,,,R3,RIVER ROAD,00012/00003/20E,730,730,10/16/2007,1,S0,0.23,,1,,,,0 S MADISON ST
19624,ALBANY GA CITY OF DCED,0,S,MADISON,ST,,,R3,RIVER ROAD,00012/00003/20C,730,730,10/16/2007,1,S0,0.23,,1,,,,0 S MADISON ST
19625,ALBANY GA CITY OF DCED,0,S,MADISON,ST,,,R3,RIVER ROAD,00012/00003/20D,830,830,10/16/2007,1,S0,0.32,,1,,,,0 S MADISON ST
19626,THOMAS JOSEPHINE,0,S,MADISON,ST,,,R3,RIVER ROAD,00012/00003/15C,1000,1000,10/16/2007,1,S0,0.13,RESIDENTIAL,3,,,,0 S MADISON ST


In [26]:
f_addresses.head(100)

0           501 S MADISON ST
1            525 LINCOLN AVE
2           609 S JACKSON ST
3            602 S MONROE ST
4           509 W GORDON AVE
5           610 W GORDON AVE
6           700 W GORDON AVE
7           708 W GORDON AVE
8            504 WHITNEY AVE
9            510 WHITNEY AVE
10           423 WHITNEY AVE
11            402 MERCER AVE
12            404 MERCER AVE
13            408 MERCER AVE
14            522 MERCER AVE
15         414 S MCKINLEY ST
16          411 S MADISON ST
17            525 MERCER AVE
18            511 MERCER AVE
19            507 MERCER AVE
20           118 BLAYLOCK ST
21          1221 E BROAD AVE
22         1218 MULBERRY AVE
23             2632 ERICA CT
24          202 N CARROLL ST
25       1706 S JEFFERSON ST
26    2703 POINTE NORTH BLVD
27            2728 MCCALL CT
28       1000 N SLAPPEY BLVD
29           2703 MICHAEL RD
30        235 BONNY VIEW AVE
31            5101 SHILOH CT
32        2312 WILLINGHAM DR
33              1200 AUGUSTA
34        2531

In [228]:
df_full_taxes.sort_values(by=['FULL_ADDRESS', 'YEAR'])

KeyError: 'YEAR'