# Purpose: Identify building site with high-frequency complaints 

In [None]:
# Imports
import os
import pandas as pd
import warnings
import importlib
import noise_functions
importlib.reload(noise_functions)  # For local debugging

# Init environment
out_dir = 'derivatives'
os.makedirs(out_dir, exist_ok=True)
# pd.set_option('display.max_columns', None); pd.reset_option('display.max_columns') # Options for local debugging
warnings.filterwarnings("ignore", message="iteritems is deprecated")

# Read data

In [None]:
# Read
# rawD = setup.read_data()  # keep persistent copy for reference during development
df = noise_functions.read_data()

In [None]:
# Preprocess datetime columns
df = noise_functions.convert_raw_date_time(df)
df = noise_functions.remove_missing_date_rows(df)
df = noise_functions.create_joined_datetime(df)

In [None]:
# Sort by datetime
df = df.sort_values('Received DateTime')

## Subset - Building Site Complaints

In [None]:
# Filter to 'Building Site' complaints
df = df[df['Complaint Type'].isin(['Building Site', 'Building site'])]
print('Dropped non-building sites')
noise_functions.describe_data(df)

In [None]:
# Subset for latest 2 months
first_date = '2023-02-12'
last_date = '2023-04-12'
df = noise_functions.get_subset_via_dates(df, first_date, last_date)
print(f'Got subset of complaints within these dates (inclusive):\n{first_date} - {last_date}')
noise_functions.describe_data(df)

## Display unique values

Let's display all unique values throughout these complaints to understand the gist.

In [None]:
unique_values_dict = noise_functions.extra_print_unique_data(df)

In [None]:
unique_counts_dict_reference = noise_functions.count_unique_codes(df, 'Noise Complaint Index', out_dir)[0]  # Reference in Uniform

## Write frequency (.json) based on complaint geographical location

In [None]:
unique_counts_dict_ward = noise_functions.count_unique_codes(df, 'WardCode', out_dir)[0]

In [None]:
unique_counts_dict_LSOA = noise_functions.count_unique_codes(df, 'LSOACode', out_dir)[0]

In [None]:
unique_counts_dict_MSOA = noise_functions.count_unique_codes(df, 'MSOACode', out_dir)[0]

In [None]:
unique_counts_dict_OutA = noise_functions.count_unique_codes(df, 'OutputArea', out_dir)[0]

## Manually Identify Top 5 LSOA in GIS Web Application

In [None]:
print('All LSOAs\n')
print(unique_counts_dict_LSOA)

print('\nTop 5 LSOAs\n')
for key in list(unique_counts_dict_LSOA.keys())[:5]:
    print(key, unique_counts_dict_LSOA[key])