# Purpose: Identify building site with high-frequency complaints 

In [None]:
# Imports
import os
import pandas as pd
import warnings
import importlib
import noise_functions
import noise_plot_functions
from matplotlib import pyplot as plt
importlib.reload(noise_functions)  # For local debugging
importlib.reload(noise_plot_functions) # For local debugging


# Init environment
out_dir = 'derivatives'
os.makedirs(out_dir, exist_ok=True)
# pd.set_option('display.max_columns', None); pd.reset_option('display.max_columns') # Options for local debugging
warnings.filterwarnings("ignore", message="iteritems is deprecated")

# Read data

In [None]:
df = pd.read_csv(os.path.join('data','raw-all-noise-complaints_range-1997-12-Apr-2023' + '.csv'))
print('Loaded data as: df')
noise_functions.describe_data(df)

In [None]:
# Preprocess datetime columns
df = noise_functions.convert_raw_date_time(df)
df = noise_functions.remove_missing_date_rows(df)
df = noise_functions.create_joined_datetime(df)

In [None]:
# Sort by datetime
df = df.sort_values('Received DateTime')
print('sorted df')
noise_functions.describe_data(df)

In [None]:
cutoffDate = '2000-01-01'

print(f'dropping dates before{cutoffDate}')
df = df[~(df['Received DateTime'] < cutoffDate)]
noise_functions.describe_data(df)

## All time - nComplaints

In [None]:
# nComplains (Daily)
plt.figure(figsize=(10, 8))  # plt.style.use('bmh')
fig = plt.gcf()
fig.patch.set_facecolor('white')

df.groupby('Received Date').count().plot(kind='line')
plt.title('nComplaints (Daily Binned)', fontsize=18)

In [None]:
# nComplains (Monthly)
plt.figure(figsize=(10, 8))  # plt.style.use('bmh')
fig = plt.gcf()
fig.patch.set_facecolor('white')

df.groupby(df['Received DateTime'].dt.to_period('D'))['Complaint Type'].count().plot(kind='line')

plt.title('nComplaints (Monthly Binned)', fontsize=18)
plt.xlabel("Received Date", fontsize=14)
plt.ylabel('nComplaints', fontsize=14)
plt.tick_params(axis='both', labelsize=12, length=0)
plt.grid(False)


In [None]:
# nComplains (Yearly)
plt.figure(figsize=(10, 8))  # plt.style.use('bmh')
fig = plt.gcf()
fig.patch.set_facecolor('white')

df.groupby(df['Received DateTime'].dt.to_period('Y'))['Complaint Type'].count().plot(kind='line')

plt.title('nComplaints (Monthly Binned)', fontsize=18)
plt.xlabel("Received Date", fontsize=14)
plt.ylabel('nComplaints', fontsize=14)
plt.tick_params(axis='both', labelsize=12, length=0)
plt.grid(False)