In [None]:
os.chdir('..')

In [None]:

from model_config import *
from model_packages import *
from model_utils import *
import re

# Define functions

In [None]:
def preprocess_input_data(input_data):
    # raw data wrangling
    # create geo dataframe
    df= gpd.GeoDataFrame(input_data)

    # drop sites that create negative prediction values- to be investigated
    df= df[~df['counter'].isin(['Vessey_Pastures', 'Trosley_CP'])]
    # assign geometry as centre of buffer
    df['geometry']= df.geometry.centroid
    # extract lat lon
    df['lon']= df.geometry.x
    df['lat']= df.geometry.y

    # create season columns
    df = create_season_columns(df)


    # combine minority classes in land_type features 
    lnd_dict={'major_urban_settings':'urban_settings',\
    'minor_urban_settings':'urban_settings'}    
    df['land_type_labels'].replace(lnd_dict,inplace=True)

    # create land and habitat feature constants
    lnd_ftrs=['land_type_labels_'+x for x in list(df['land_type_labels'].unique())]
    hbt_ftrs=['land_habitat_labels_'+x for x in list(df['land_habitat_labels'].unique())]

    return df, lnd_ftrs, hbt_ftrs

def get_season(month):
    if 3 <= month <= 5:
        return 'spring'
    elif 6 <= month <= 8:
        return 'summer'
    elif 9 <= month <= 11:
        return 'autumn'
    else:
        return 'winter'
def create_season_columns(df):
    df['Month'] = df['Date'].str.split('-', expand=True)[1].astype(int)
    df['Season'] = df['Month'].apply(get_season)
    season_columns = pd.get_dummies(df['Season'])
    df = pd.concat([df, season_columns], axis=1)
    df.drop(columns=['Month', 'Season', 'winter'], inplace=True)
    return df

# Load Model

In [None]:

model = load_model(data_folder+'voting_regressor_model') 

# Format input data

In [None]:
ts_input_data= pd.read_pickle(data_folder+f'test_sites_static_and_dynamic_features_{bufr_zones_mrts}.pkl')
ts_input_data[['amenity_bar', 'amenity_bus_station', 'amenity_food_court', 'amenity_taxi_station', 'amenity_coach_parking', 'amenity_beer_garden']] = 0

df, lnd_ftrs, hbt_ftrs= preprocess_input_data(ts_input_data)

# create dummy variables for categorical data
df=pd.get_dummies(df,columns=['land_type_labels'])
df=pd.get_dummies(df,columns=['land_habitat_labels'])
df['counter']= df['site']

In [None]:
# If test locations dont have certain features present e.g. no 'amenity_pub' 
# or no test location labled 'land_type_labels_rural_settings' these columns must be added 

original_input= pd.read_pickle(data_folder+'training_predictions.pkl')
original_columns = original_input.columns

cols = original_input.columns.union(df.columns)

df= df.reindex(cols, axis=1, fill_value=0)

# Generate Predictions

In [None]:
# fill na with 0 due to sites with missing POI features. This is expected for some locations. 
test_site_predictions= predict_model(model, data=df[['counter','Date', 'Dog', 'accessible_green_space_area', 'PROW_Total_length_km', 'waterside_length_km', 'land_type_labels_mixed_settings', 'land_habitat_labels_Grassland_woodland_wetland', 'amenity_bar', 'amenity_parking', 'amenity_pub', 'amenity_toilets', 'amenity_beer_garden', 'amenity_bus_station', 'amenity_food_court', 'amenity_taxi_station', 'amenity_coach_parking', 'Age group 0-25', 'Age group 25-65', 'Household is deprived in at least 1 dimension', 'Unemployed_population', 'Asian, Asian British or Asian Welsh', '2 or more cars or vans in household', 'Population Density: Persons per square kilometre; measures: Value', 'total_trip_count', 'tavg']].fillna(0))