In [1]:
from flood_tool.tool import Tool
import pandas as pd
import numpy as np
from flood_tool import geo

In [2]:
tool = Tool(
    postcode_file='postcodes_unlabelled.csv', 
    sample_labels='postcodes_sampled.csv', 
    household_file='households_per_sector.csv'
)

In [3]:
tool.household_data.head()

Unnamed: 0,postcode sector,households,number of postcode units
0,AL1 1,2546,311
1,AL1 2,2642,236
2,AL1 3,2272,388
3,AL1 4,3707,339
4,AL1 5,4406,347


In [4]:
tool.postcode_sampled.head()

Unnamed: 0,postcode,sector,easting,northing,localAuthority,altitude,soilType,riskLabel,medianPrice
0,PO7 8PR,PO7 8,469395.0,108803.0,Havant,30,Planosols,1,233500.0
1,SO17 1NS,SO17 1,442771.0,114321.0,Southampton,20,Unsurveyed/Urban,1,291800.0
2,TN28 8XN,TN28 8,606861.0,124689.0,Folkestone and Hythe,10,Cambisols,1,326500.0
3,KT3 4JW,KT3 4,521649.0,168848.0,Kingston upon Thames,20,Unsurveyed/Urban,1,875200.0
4,CT2 8AA,CT2 8,614532.0,158074.0,Canterbury,10,Unsurveyed/Urban,10,303500.0


In [5]:
tool.postcodedb.head()

Unnamed: 0,postcode,sector,easting,northing,localAuthority,altitude,soilType
0,BN1 5PF,BN1 5,530401.0,105619.0,Brighton and Hove,60,Unsurveyed/Urban
1,BN7 2HP,BN7 2,541934.0,110957.0,Lewes,10,Unsurveyed/Urban
2,TN6 3AW,TN6 3,552132.0,129270.0,Wealden,160,Unsurveyed/Urban
3,BN3 7LP,BN3 7,527448.0,106738.0,Brighton and Hove,60,Unsurveyed/Urban
4,DA16 2DU,DA16 2,546021.0,175403.0,Bexley,60,Unsurveyed/Urban


##### Optional: it might be wise to retrain the flood class model using randomized search when new sampled data is passed in, however this can take some time (up to 15-20 mins). comment or uncomment one of the lines depending which one you want to use. This may require restarting the kernel

In [3]:
tool.train()
# tool.train_flood_class_cv_optimization()

Training flood class model
==== Converting easting northing to lat long
==== Fitting the data using random forest regressor
Training local auth model
Training median price model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["latitude"] = lat
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["longitude"] = lon


Training flood class from loc model
====> fitting the flood class from location model


##### example test data:

In [6]:
postcodes = ['BN1 5PF', 'BN7 2HP']
sectors = ['BN1 5', 'BN7 2']
eastings = [541934.0, 552132.0]
northings = [110957.0, 129270.0]
latitudes = [50,51]
longitudes = [1,2]


#####   Get flood class from easting northing locations

In [5]:
tool.get_flood_class_from_OSGB36_locations(eastings, northings)

====> generating predictions for flood class from location


(541934.0, 110957.0)    1
(552132.0, 129270.0)    1
dtype: int64

In [7]:
tool.get_flood_class_from_WGS84_locations(latitudes=latitudes, longitudes=longitudes)

====> fitting the flood class from location model
====> generating predictions for flood class from location


Feature names unseen at fit time:
- latitude
- longitude
Feature names seen at fit time, yet now missing:
- easting
- northing



(50, 1)    3
(51, 2)    3
dtype: int64

##### Get flood class from postcodes

In [9]:
tool.get_flood_class_from_postcodes(postcodes)

BN1 5PF    0.010000
BN7 2HP    1.478956
dtype: float64

In [10]:
tool.get_median_house_price_estimate(postcodes)

BN1 5PF    567665.440159
BN7 2HP    614543.011162
dtype: float64

In [11]:
tool.get_total_value(postcodes)

BN1 5PF    567665.440159
BN7 2HP    614543.011162
dtype: float64

In [12]:
tool.get_total_value(sectors)

BN1 5    2.315115e+09
BN7 2    2.102573e+09
dtype: float64

In [13]:
tool.get_annual_flood_risk(postcodes)

BN1 5PF      5676.654402
BN7 2HP    908882.272535
dtype: float64

#### Get predictions for full set of unlabelled data

In [14]:
postcode_unlabelled = tool.postcodedb.copy()

easting, northing = np.array(postcode_unlabelled['easting']), np.array(postcode_unlabelled['northing'])

lat, long = geo.get_gps_lat_long_from_easting_northing(easting, northing)

postcode_unlabelled['latitude'] = lat
postcode_unlabelled['longitude'] = long
postcode_unlabelled = postcode_unlabelled.drop(columns=['easting', 'northing'])
tool.get_predicitions_for_unlabelled_data(postcode_unlabelled)

==== Generating prediction for flood class from postcodes


Unnamed: 0,postcode,sector,localAuthority,altitude,soilType,latitude,longitude,riskLabel,medianPrice
0,BN1 5PF,BN1 5,Brighton and Hove,60,Unsurveyed/Urban,50.834677,-0.148139,1,567665.440159
1,BN7 2HP,BN7 2,Lewes,10,Unsurveyed/Urban,50.879937,0.017610,3,614543.011162
2,TN6 3AW,TN6 3,Wealden,160,Unsurveyed/Urban,51.041906,0.170125,1,695300.000000
3,BN3 7LP,BN3 7,Brighton and Hove,60,Unsurveyed/Urban,50.845393,-0.189664,1,486309.406243
4,DA16 2DU,DA16 2,Bexley,60,Unsurveyed/Urban,51.458048,0.101839,1,434625.845110
...,...,...,...,...,...,...,...,...,...
4995,N17 9XJ,N17 9,Haringey,10,Unsurveyed/Urban,51.598652,-0.055595,6,528861.033504
4996,E9 6SG,E9 6,Hackney,10,Unsurveyed/Urban,51.543341,-0.046618,1,551708.081397
4997,CT6 8TB,CT6 8,Canterbury,10,Unsurveyed/Urban,51.370422,1.120780,5,301870.580824
4998,KT17 1XX,KT17 1,Epsom and Ewell,40,Unsurveyed/Urban,51.345942,-0.252416,1,555390.282128
