# Purpose: Explore April 2022-2023 Complaints

In [None]:
# Imports
import os
import pandas as pd
import warnings
import importlib
import noise_functions
import noise_plot_functions
import matplotlib.pyplot as plt
import numpy as np
importlib.reload(noise_functions)  # For local debugging
importlib.reload(noise_plot_functions) # For local debugging


# Init environment
out_dir = 'derivatives'
os.makedirs(out_dir, exist_ok=True)
# pd.set_option('display.max_columns', None); pd.reset_option('display.max_columns') # Options for local debugging
warnings.filterwarnings("ignore", message="iteritems is deprecated")

# Read data

In [None]:
# Read
# rawD = setup.read_data()  # keep persistent copy for reference during development
df = noise_functions.read_data()

In [None]:
# Preprocess datetime columns
df = noise_functions.convert_raw_date_time(df)
df = noise_functions.remove_missing_date_rows(df)
df = noise_functions.create_joined_datetime(df)

In [None]:
# Sort by datetime
df = df.sort_values('Received DateTime')

## Subset - 2022-2023 April

In [None]:
# Subset for latest 2 months
first_date = '2022-04-12'
last_date = '2023-04-12'
df = noise_functions.get_subset_via_dates(df, first_date, last_date)
print(f'Got subset of complaints within these dates (inclusive):\n{first_date} - {last_date}')
noise_functions.describe_data(df)

## Display unique values

Let's display all unique values throughout these complaints to understand the gist.

In [None]:
unique_values_dict = noise_functions.extra_print_unique_data(df)

In [None]:
df

In [None]:
# Plot unique categories x nComplaints (Complaint Type)
descript_str = 'Frequency_Complaint-Type'

unique_counts_dict_complaint_type = noise_functions.count_unique_codes(df, 'Complaint Type', out_dir)[0]
categories = list(unique_counts_dict_complaint_type.keys())
values = list(unique_counts_dict_complaint_type.values())

fig = noise_plot_functions.plot_nComplaint_barh(categories, values, descript_str)
out_name = os.path.join(out_dir, descript_str + '.png')
print(f"writing {out_name}")

plt.savefig(out_name, bbox_inches='tight', dpi=300)

In [None]:
# Repeat this with Complaint-Sub-Type Plot unique categories x nComplaints (Complaint Sub- Type)
descript_str = 'Frequency_Complaint-Sub-Type'

unique_counts_dict_complaint_type = noise_functions.count_unique_codes(df, 'Complaint Sub Type', out_dir)[0]
categories = list(unique_counts_dict_complaint_type.keys())
values = list(unique_counts_dict_complaint_type.values())

fig = noise_plot_functions.plot_nComplaint_barh(categories, values, descript_str)
out_name = os.path.join(out_dir, descript_str + '.png')
print(f"writing {out_name}")

plt.savefig(out_name, bbox_inches='tight', dpi=300)
