<h1>Week 7 Summative Report</h1>
<h3>Data Analysis Tool - Y3871883</h3><br>
There are three sections to the Jupyter Notebook:
<li>Static Methods and Import Statements</li>
<li>GUI Classes</li>
<li>Instantiate Tkinter GUI and Run Main Loop</li>

<H2>1. Static methods and Import Statements</H2>

In [1]:
import tkinter as tk
from tkinter import ttk, filedialog
from PIL import ImageTk, Image
from pymongo import MongoClient
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import seaborn as sns
import tkcalendar
from datetime import datetime
# import threading

# Static Methods
def get_file_input():
    """Load, clean and save data to DB

    """
    filename = tk.filedialog.askopenfilename(initialdir="/",
                                             title="Select file",
                                             filetypes=(("csv files", "*.csv"),
                                                        ("all files", "*.*")))
    return filename


def owner_id_check(string_in):
    if re.match(r'^OW\d{7}$', string_in):
        return string_in
    else:
        return np.nan


def facility_id_check(string_in):
    if re.match(r'^FA\d{7}$', string_in):
        return string_in
    else:
        return np.nan


def record_id_check(string_in):
    if re.match(r'^PR\d{7}$', string_in):
        return string_in
    else:
        return np.nan


def facility_zip_check(string_in):
    if re.match('^[0-9]{5}(?:-[0-9]{4})?$', string_in):
        return string_in
    else:
        return np.nan


def compute_stats(df, columns, value):
    stats = dict()
    stats['mean'] = df.SCORE.mean(level=columns).round(2)
    stats['median'] = df.SCORE.median(level=columns)
    stats['mode'] = df.groupby(columns)[value].agg(lambda x: x.value_counts().index[0])
    return stats

<H2>GUI Classes</H2>

In [2]:
class DataAnalysisApp:  # controller
    def __init__(self, master):
        self.master = master
        master.title("Dept. of Health - Inspection Analysis")
        master.wm_iconbitmap('DOH.ico')
        master.configure(bg='white')

        self.model = Model()
        self.logo_view = LogoView(master)
        self.main_view = DataImportView(master, self)
        self.history_view = HistoryView(master, self)
        self.violation_view = None
        self.correlation_view = None
        self.stats_view = None

        master.grid_rowconfigure(0, minsize=100, weight=1)
        master.grid_rowconfigure(1, minsize=100, weight=2)
        master.grid_rowconfigure(2, minsize=100, weight=1)
        master.grid_columnconfigure(0, weight=1)

        self.set_up_database()

        self.initialise_dates()
        self.set_inspections_doc_count_string()

        inspection_docs_in_db = self.doc_count_db()
        if inspection_docs_in_db <= self.model.MIN_DOC_COUNT:
            self.main_view.run_button["state"] = "disabled"

    def get_inspections_input(self):
        filename = get_file_input()
        if filename:
            self.model.inspections_filename = filename
            self.model.inspections_df = self.load_file(self.model.inspections_filename)
            self.clean_inspections_data()
            self.main_view.violations_file_button["state"] = "normal"
            inspection_docs_in_db = self.doc_count_db()
            if inspection_docs_in_db > self.model.MIN_DOC_COUNT:
                self.main_view.run_button["state"] = "normal"

    def get_violations_input(self):
        filename = get_file_input()
        if filename:
            self.model.violations_filename = filename
            self.model.violations_df = self.load_file(self.model.violations_filename)
            self.remove_inactive_data()
            self.convert_to_dict("inspections_violations")
            self.load_data("inspections_violations")
            inspection_docs_in_db = self.doc_count_db()
            if inspection_docs_in_db > self.model.MIN_DOC_COUNT:
                self.main_view.run_button["state"] = "normal"

    def get_inventory_input(self):
        filename = get_file_input()
        if filename:
            self.model.inventory_filename = filename
            self.model.inventory_df = self.load_file(self.model.inventory_filename)
            self.convert_to_dict("inventory")
            self.load_data("inventory")

    def load_file(self, filename):
        """Read data from csv file

        """
        if filename.endswith('.csv'):
            try:
                with open(filename, encoding='utf-8-sig') as file:
                    self.message(f"Loading data from {filename}")
                    df = pd.read_csv(file)
                return df
            except FileNotFoundError:
                return False
        else:
            self.error_message("Error - file must be .csv")
            return False

    def clean_inspections_data(self):
        """Clean the data read from csv

        """
        self.message("Cleaning data...")
        self.inspection_data_checks()
        self.clean_inspections_data()
        self.correct_inspections_datatypes()
        self.create_risk_column()

    def inspection_data_checks(self):
        """Check each column of the data to see what we need to clean/edit"""
        df = self.model.inspections_df
        df['OWNER ID'] = df['OWNER ID'].apply(owner_id_check)
        df['FACILITY ID'] = df['FACILITY ID'].apply(facility_id_check)
        df['FACILITY ZIP'] = df['FACILITY ZIP'].apply(facility_zip_check)
        df.loc[df['PROGRAM ELEMENT (PE)'] > 9999, 'PROGRAM ELEMENT (PE)'] = np.nan
        df.loc[df['PROGRAM ELEMENT (PE)'] < 1000, 'PROGRAM ELEMENT (PE)'] = np.nan
        self.model.inspections_df = df

    def clean_inspections_data(self):
        """Clean and return pandas dataframe

        Arguments:
        Pandas dataframe from Inspections.csv, converted from import_file()
        """
        # Scores below 70 do not have a grade,
        # fill with sentinel to avoid dropping later
        df = self.model.inspections_df
        low_scores = df[df['SCORE'] < 70].copy()
        low_scores['GRADE'] = 'No Grade'
        df[df['SCORE'] < 70] = low_scores

        # Remove all rows without data in any column, modify in future if
        # the customer is unconcerned about missing data from certain columns
        df.dropna(how='any', inplace=True)
        df.drop_duplicates(inplace=True)
        df.reset_index(drop=True, inplace=True)

        # Seven-Eleven stores can be incorrectly interpreted as a date
        df['FACILITY NAME'].replace('07-Nov', 'SEVEN-11', inplace=True)
        df['PROGRAM NAME'].replace('07-Nov', 'SEVEN-11', inplace=True)

        # Some unicode characters must be removed
        # this will need to be widened out to cover all text columns
        df['FACILITY CITY'] = df['FACILITY CITY'].str.replace("&#160;", " ")
        df['FACILITY ADDRESS'] = df['FACILITY ADDRESS'].str.replace("&#160;", " ")

        # All results are from California
        # We could drop the column but will maintain it for posterity
        df['FACILITY STATE'] = 'CA'

        self.model.inspections_df = df

    def correct_inspections_datatypes(self):
        df = self.model.inspections_df
        float_columns = df.select_dtypes(include=['float64']).columns
        df[float_columns] = df[float_columns].astype('int64')

        df['ACTIVITY DATE'] = pd.to_datetime(df['ACTIVITY DATE'], infer_datetime_format=True)

        max_date = df['ACTIVITY DATE'].dt.date.max()
        min_date = df['ACTIVITY DATE'].dt.date.min()

        todays_date = pd.to_datetime('today').date()
        if max_date > todays_date:
            df['ACTIVITY DATE'][df['ACTIVITY DATE'] > todays_date] = todays_date
        elif min_date < pd.datetime(1990, 1, 1).date():
            df['ACTIVITY DATE'][df['ACTIVITY DATE'] < pd.datetime(1990, 1, 1).date()] = todays_date
        else:
            pass
        self.model.inspections_df = df

    def create_risk_column(self):
        """ Create new column in inspections df based on risk
        documented in PE DESCRIPTION
        """
        # Take risk data and create a new column with it under the assumption
        # that the format will not change, the last two words are the risk level
        df = self.model.inspections_df
        df['RISK'] = df['PE DESCRIPTION'].str.split(' ').str[-2]
        # Remove mention of risk from the original column
        df['PE DESCRIPTION'] = df['PE DESCRIPTION'].str.split(' ').str[0:-2].str.join(' ')
        self.model.inspections_df = df

    def remove_inactive_data(self):
        """ Two datasets are joined to perform later analysis on violations
        """
        inspections = self.model.inspections_df
        violations = self.model.violations_df

        # Remove inactive programs
        inspections_serial = inspections['SERIAL NUMBER']
        inactive_mask = inspections['PROGRAM STATUS'] == 'INACTIVE'
        inactive_inspections_serial = inspections_serial[inactive_mask]

        violations_mask = ~violations['SERIAL NUMBER'].isin(inactive_inspections_serial)
        violations = violations[violations_mask]

        active_inspections_mask = inspections['PROGRAM STATUS'] == 'ACTIVE'
        inspections = inspections[active_inspections_mask]

        self.model.violations_df = violations
        self.model.inspections_df = inspections

    def convert_to_dict(self, collection):
        """Convert csv input to dict

        """
        self.message(f"Converting data...")
        if collection == "inspections_violations":
            self.model.inspections_dict = self.model.inspections_df.to_dict('records')
            self.model.violations_dict = self.model.violations_df.to_dict('records')
        else:
            self.model.inventory_dict = self.model.inventory_df.to_dict('records')

    def set_up_database(self):
        """Create/use collection in MongoDB.

        """
        assert type(self.model.db_name) is str, "Error, database name must be a string"
        self.message(f"Connecting to local database...")
        client = MongoClient("mongodb://localhost:27017/")
        self.model.db = client[self.model.db_name]
        self.model.inspections_collection = self.model.db["inspections"]
        self.model.violations_collection = self.model.db["violations"]
        self.model.inventory_collection = self.model.db["inventory"]
        self.message(f"Connected to {self.model.db_name} database")

    def load_data(self, collection):
        """Insert data in MongoDB collection.

        """
        self.message(f"Loading data to {self.model.db_name} database...")
        if collection == "inspections_violations":
            self.model.inspections_collection.insert_many(self.model.inspections_dict)
            self.model.violations_collection.insert_many(self.model.violations_dict)
        elif collection == "inventory":
            self.model.inventory_collection.insert_many(self.model.inventory_dict)
        self.message(f"Data loaded to {self.model.db_name} database.")

    def message(self, text):
        """Send message to the message pane

        """
        self.history_view.update(text, False)
        self.master.update_idletasks()
        
    def error_message(self, text):
        """Send message to the message pane

        """
        self.history_view.update(text, True)
        self.master.update_idletasks()

    def run_analysis(self):
        """Filter data and perform statistics and generate plots

        """
        self.close_windows()
        
        inspections_dict = list(self.model.inspections_collection.find(
            {"ACTIVITY DATE": {"$gte": self.model.date_from, '$lte': self.model.date_to}},
            {'_id': 0, 'SERIAL NUMBER': 1, 'RECORD ID': 1, 'ACTIVITY DATE': 1, 'SCORE': 1, 'FACILITY CITY': 1}))
        violations_dict = list(
            self.model.violations_collection.find({}, {'_id': 0, 'SERIAL NUMBER': 1, 'VIOLATION CODE': 1}))

        if len(inspections_dict) <= 3:
            self.error_message(f"Only {len(inspections_dict)} records from the selected date range.")
            self.error_message("Tool requires more than 3 samples to analyse data.")
            self.error_message("Please choose a wider date range or load more data.")
        else:
            # Convert to pandas df
            inspections = pd.DataFrame(inspections_dict)
            self.model.inspections_df = inspections

            violations = pd.DataFrame(violations_dict)

            # Join the datasets
            violations = violations.set_index('SERIAL NUMBER')
            inspections = inspections.set_index('SERIAL NUMBER')
            df = inspections.join(violations).reset_index()

            # Drop any duplicates
            df.drop_duplicates(inplace=True)
            self.model.inspections_violations_df = df

            # Begin analysis
            self.message("Beginning analysis")

            counts = self.violation_info()
            self.violation_view = ViolationView(self.master, counts)

            violation_by_facility = self.violation_correlation()
            self.correlation_view = ViolationCorrView(self.master, violation_by_facility)

            all_stats = self.get_stats()
            self.stats_view = StatsView(self.master, all_stats, self.model.corr_value)

            self.message("All complete")

    def doc_count_db(self):
        document_db_count = self.model.inspections_collection.count_documents(
            {"ACTIVITY DATE": {"$gte": self.model.date_from, '$lte': self.model.date_to}})
        return document_db_count

    def close_windows(self):
        try:
            self.violation_view.close_window()
        except AttributeError:
            pass

        try:
            self.correlation_view.close_window()
        except AttributeError:
            pass

        try:
            self.stats_view.close_window()
        except AttributeError:
            pass

    def violation_info(self):
        """ Violations are counted and plotted
        """
        df = self.model.inspections_violations_df

        unique_facilities_by_violation = df.groupby('VIOLATION CODE')['RECORD ID'].unique()
        stacked = unique_facilities_by_violation.apply(pd.Series).stack()
        counts = stacked.groupby('VIOLATION CODE').count().sort_values(ascending=False)

        counts_top = counts[0:25]
        other = pd.Series(counts[26:].sum(), index=['Others'])
        counts = counts_top.append(other)
        counts = counts / 1000
        return counts

    def violation_correlation(self):
        """ Correlation of the violations and latest inspection score
        is determined and plotted
        """
        df = self.model.inspections_violations_df
        latest_scores_group = df.groupby('RECORD ID')['ACTIVITY DATE', 'SCORE']
        latest_scores = latest_scores_group.apply(lambda g: g.sort_values(by='ACTIVITY DATE')[-1:])
        latest_scores = latest_scores['SCORE']
        latest_scores = latest_scores.droplevel(1)

        violation_count = df.groupby('RECORD ID')['VIOLATION CODE'].count()
        violation_by_facility = pd.concat([violation_count, latest_scores], axis=1)
        violation_by_facility.columns = ['Violation Count', 'Latest Score']
        self.model.corr_value = violation_by_facility.corr().iloc[0, 1]

        return violation_by_facility

    def get_stats(self):
        inspections = self.model.inspections_df
        all_stats = {}

        inspections['Year'] = inspections['ACTIVITY DATE'].dt.year
        inspections.set_index(['Year', 'FACILITY CITY'], inplace=True)

        all_stats['by_year'] = compute_stats(inspections, 'Year', 'SCORE')

        all_stats['by_facility_year'] = compute_stats(inspections, ['Year', 'FACILITY CITY'], 'SCORE')

        return all_stats

    def initialise_dates(self):
        date_from = self.main_view.date_filter_from.get_date()
        date_to = self.main_view.date_filter_to.get_date()

        # Pymongo cannot handle date type so we need to convert to datetime
        self.model.date_from = datetime.combine(date_from, datetime.min.time())
        self.model.date_to = datetime.combine(date_to, datetime.min.time())

    def date_changed(self, event):
        """Store the latest date from the calendar in the model

        """
        date_from = self.main_view.date_filter_from.get_date()
        date_to = self.main_view.date_filter_to.get_date()

        # Ensure that date order is never invalid
        if date_from > date_to and (date_from is not self.model.date_from):
            date_to = date_from
            self.main_view.date_filter_to.set_date(date_to)
        elif date_from > date_to and (date_to is not self.model.date_to):
            date_from = date_to
            self.main_view.date_filter_from.set_date(date_from)

        # Pymongo cannot handle date type so we need to convert to datetime
        self.model.date_from = datetime.combine(date_from, datetime.min.time())
        self.model.date_to = datetime.combine(date_to, datetime.min.time())

        self.set_inspections_doc_count_string()

    def set_inspections_doc_count_string(self):
        doc_count = self.doc_count_db()
        self.model.inspections_document_count.set(f"{doc_count} inspections in selected date range.")


class Model:
    """Datastore for GUI

    """

    def __init__(self):
        self.inspections_filename = None
        self.violations_filename = None
        self.inventory_filename = None

        self.inspections_document_count = tk.StringVar()

        self.db_name = "dept_of_health"

        self.inspections_df = None
        self.violations_df = None
        self.inventory_df = None
        self.inspections_violations_df = None
        self.inspections_dict = {}
        self.violations_dict = {}
        self.inventory_dict = {}

        self.inspections_collection = None
        self.violations_collection = None
        self.inventory_collection = None

        self.corr_value = None

        date_time_now = datetime.now()
        self.date_from = date_time_now
        self.date_to = date_time_now

        self.MIN_DOC_COUNT = 3


class LogoView:
    """Set the app banner on the top of the window

    """

    def __init__(self, master):
        self.master = master

        ttk.Style().configure("TFrame", background='white')

        self.frame = ttk.Frame(self.master, style='TFrame')
        self.frame.grid(row=0, sticky="ew")

        self.canvas = tk.Canvas(self.frame, width=300, height=100, highlightthickness=0)

        self.img = ImageTk.PhotoImage(Image.open("logo.png"))
        self.canvas.create_image(0, 0, anchor=tk.NW, image=self.img)
        self.canvas.grid(row=0, column=0, sticky='NSEW')


class DataImportView:
    """User interaction tab - select data, report type etc.

    """

    def __init__(self, master, controller):
        self.master = master
        self.controller = controller

        self.frame = ttk.Frame(self.master,
                               style='TFrame')
        self.frame.grid(row=1, column=0, sticky='NSEW', padx=5, pady=5)

        self.frame.grid_rowconfigure(0, weight=1)
        self.frame.grid_rowconfigure(1, weight=1)
        self.frame.grid_columnconfigure(0, weight=1)
        
        ttk.Style().configure("TLabelframe",
                              background="white", foreground='red')
        ttk.Style().configure("TLabelframe.Label",
                              background="white",
                              foreground='blue')
        
        # File Selection
        self.fileload_label_frame = ttk.Labelframe(self.frame,
                                                   text='Load Files to Database',
                                                   height=100,
                                                   style="TLabelframe")
        self.fileload_label_frame.grid(row=0, column=0, columnspan=2, sticky='NSEW')

        # File Selection - Inspections
        self.inspections_file_button = tk.Button(self.fileload_label_frame,
                                                 text='1. Load Inspections File',
                                                 width=30,
                                                 command=controller.get_inspections_input,
                                                 bg='white')
        self.inspections_file_button.grid(row=0, column=0, columnspan=2, padx=5, pady=5)

        # File Selection - Violations
        self.violations_file_button = tk.Button(self.fileload_label_frame,
                                                text='2. Load Violations File',
                                                width=30,
                                                command=controller.get_violations_input,
                                                bg='white',
                                                state='disabled')
        self.violations_file_button.grid(row=1, column=0, columnspan=2, padx=5, pady=5)

        # File Selection - Inventory
        self.inventory_file_button = tk.Button(self.fileload_label_frame,
                                               text='3. Load Inventory File (Optional)',
                                               width=30,
                                               command=controller.get_inventory_input,
                                               bg='white')
        self.inventory_file_button.grid(row=2, column=0, columnspan=2, padx=5, pady=5)

        self.fileload_label_frame.grid_rowconfigure(0, weight=1)
        self.fileload_label_frame.grid_rowconfigure(1, weight=1)
        self.fileload_label_frame.grid_rowconfigure(2, weight=1)
        self.fileload_label_frame.grid_columnconfigure(0, weight=1)

        # Filters
        self.filters_label_frame = ttk.Labelframe(self.frame,
                                                  text='Analyse Data',
                                                  height=100,
                                                  style="TLabelframe")
        self.filters_label_frame.grid(row=1, column=0, columnspan=2, sticky='NSEW')

        # Filters - Date - From
        text_date_filter_from = tk.Label(self.filters_label_frame,
                                         text='From: ',
                                         bg='white')
        text_date_filter_from.grid(row=0, column=0, padx=5, pady=5)

        self.date_filter_from = tkcalendar.DateEntry(self.filters_label_frame)
        self.date_filter_from.set_date("1/1/18")
        self.date_filter_from.bind('<<DateEntrySelected>>', self.controller.date_changed)
        self.date_filter_from.grid(row=0, column=1, padx=5, pady=5)

        # Filters - Date - To
        text_date_filter_to = tk.Label(self.filters_label_frame,
                                       text='To: ',
                                       bg="white")
        text_date_filter_to.grid(row=1, column=0, padx=5, pady=5)

        self.date_filter_to = tkcalendar.DateEntry(self.filters_label_frame)
        self.date_filter_to.bind('<<DateEntrySelected>>', self.controller.date_changed)

        self.date_filter_to.grid(row=1, column=1, padx=5, pady=5)

        self.label_document_count = tk.Label(self.filters_label_frame,
                                             bg="white",
                                             textvariable=self.controller.model.inspections_document_count)
        self.label_document_count.grid(row=2, column=0, columnspan=2, padx=5, pady=5)

        # Run
        self.run_button = tk.Button(self.filters_label_frame,
                                    text="Run Analysis",
                                    width=20,
                                    command=controller.run_analysis,
                                    bg='#003366',
                                    fg='white')
        self.run_button.grid(row=3, column=0, columnspan=2, padx=5, pady=5)

        self.filters_label_frame.grid_rowconfigure(0, weight=1)
        self.filters_label_frame.grid_rowconfigure(1, weight=1)
        self.filters_label_frame.grid_rowconfigure(2, weight=1)
        self.filters_label_frame.grid_rowconfigure(3, weight=1)
        self.filters_label_frame.grid_columnconfigure(0, weight=1)
        self.filters_label_frame.grid_columnconfigure(1, weight=1)


class HistoryView:
    """Display messages to the user at bottom of window

    """

    def __init__(self, master, controller):
        self.master = master
        self.controller = controller

        ttk.Style().configure("TFrame",
                              background='white')

        self.frame = ttk.Frame(self.master, style='TFrame')
        self.frame.grid(row=2, sticky="NSEW")

        self.scroller = tk.Scrollbar(self.frame)

        self.history_box = tk.Text(self.frame, wrap=tk.WORD, width=34, height=5)
        self.history_box.tag_config('error', foreground="red")

        self.scroller.config(command=self.history_box.yview)
        self.history_box.config(yscrollcommand=self.scroller.set)

        self.history_box.grid(row=0, column=0, sticky='NSEW', padx=5, pady=5)
        self.scroller.grid(row=0, column=1, sticky='NSW')

        self.frame.grid_rowconfigure(0, weight=1)
        self.frame.grid_columnconfigure(0, weight=1)
        self.frame.grid_columnconfigure(1, weight=1)

    def update(self, text, error):
        if error:
            self.history_box.insert(tk.END, '- ' + text + '\n', 'error')
        else:
            self.history_box.insert(tk.END, '- ' + text + '\n')
        self.history_box.see(tk.END)


class ViolationView:
    def __init__(self, master, counts):
        self.window = tk.Toplevel(master)
        self.figure = plt.Figure()
        self.ax = self.figure.add_subplot(111)
        self.plot = counts.plot(kind='bar', ax=self.ax)
        self.ax.set_title('Number of Unique Facilites per Violation')
        self.ax.set_xlabel('Violation ID')
        self.ax.set_ylabel('Number of Facilities (1,000s)')
        self.figure.set_tight_layout(True)
        canvas = FigureCanvasTkAgg(self.figure, self.window)
        canvas.get_tk_widget().pack(side="top", fill='both', expand=True)

    def close_window(self):
        try:
            self.window.destroy()
        except AttributeError:
            pass


class ViolationCorrView:
    def __init__(self, master, violation_by_facility):
        self.window = tk.Toplevel(master)
        self.figure = plt.Figure()
        self.ax = self.figure.add_subplot(111)

        self.regplot = sns.regplot(x="Violation Count", y="Latest Score", data=violation_by_facility, ax=self.ax)
        plt.ylim(violation_by_facility['Latest Score'].min(), 100)
        self.ax.set_title('Violation Count v Latest Inspection Score with Regression')

        self.figure.set_tight_layout(True)
        canvas = FigureCanvasTkAgg(self.figure, self.window)
        canvas.get_tk_widget().pack(side="top", fill='both', expand=True)
        # An improvement would be to consider the average or max violations per inspection per facility
        # as some facilities may have more violations solely due to the length of time trading

    def close_window(self):
        try:
            self.window.destroy()
        except AttributeError:
            pass


class StatsView:
    def __init__(self, master, all_stats, corr_value):
        self.stats_window = tk.Toplevel(master)
        self.stats_text = tk.Text(self.stats_window, wrap=tk.WORD)
        self.stats_text.grid(row=0, column=0, sticky='NSEW', padx=5, pady=5)

        self.stats_text.tag_configure('h1', font=('Verdana', 15, 'bold'))
        self.stats_text.tag_configure('h2', font=('Verdana', 10, 'bold'))

        self.stats_scroller = tk.Scrollbar(self.stats_window)
        self.stats_scroller.grid(row=0, column=1, sticky='NSW')
        self.stats_scroller.config(command=self.stats_text.yview)

        self.stats_text.insert(tk.END, "Correlation of Latest Inspection Score and Violation Count per Facility:\n",
                               'h1')
        self.stats_text.insert(tk.END, f"{corr_value:.2f}")
        self.stats_text.insert(tk.END, "\nSee regression scatter plot for further info.\n")

        stats = all_stats['by_year']
        self.stats_text.insert(tk.END, "\n\nStatistics for Inspection Score By Year", 'h1')
        self.stats_text.insert(tk.END, "\n\nThe mean inspection scores are:\n", 'h2')
        self.stats_text.insert(tk.END, stats['mean'])
        self.stats_text.insert(tk.END, "\n\nThe median inspection scores are:\n", 'h2')
        self.stats_text.insert(tk.END, stats['median'])
        self.stats_text.insert(tk.END, "\n\nThe mode of the inspection scores are:\n", 'h2')
        self.stats_text.insert(tk.END, stats['mode'])

        stats = all_stats['by_facility_year']
        self.stats_text.insert(tk.END, "\n\nStatistics for Inspection Score by Facility City and Year", 'h1')
        self.stats_text.insert(tk.END, "\n\nThe mean inspection scores are:\n", 'h2')
        self.stats_text.insert(tk.END, stats['mean'])
        self.stats_text.insert(tk.END, "\n\nThe median inspection scores are:\n", 'h2')
        self.stats_text.insert(tk.END, stats['median'])
        self.stats_text.insert(tk.END, "\n\nThe mode of the inspection scores are:\n", 'h2')
        self.stats_text.insert(tk.END, stats['mode'])

        self.stats_window.grid_rowconfigure(0, weight=1)
        self.stats_window.grid_columnconfigure(0, weight=1)

    def close_window(self):
        try:
            self.stats_window.destroy()
        except AttributeError:
            pass


<H2>Instantiate Tkinter GUI and Run Main Loop</H2>

In [3]:
root = tk.Tk()
gui = DataAnalysisApp(root)
root.mainloop()