# Workshop Data Visualization
> by [Jonathan Ferrari](https://jonathanferrari.com)

## Imports for Functions to Work Properly

In [1]:
import requests
from pathlib import Path
import time
#standard data analysis libraries
import numpy as np 
import pandas as pd 
#imports for displaying, rendering, and saving plots and visualizations
import plotly
import plotly.express as px
from IPython.display import *
import plotly.io as pio
import ipywidgets as widgets
from ipywidgets import *
import ast
import re
import datetime

## Visualization Functions

In [2]:
def download_data(data_url, 
                  file):
    file_path = Path(file)
    print('Downloading...', end=' ')
    resp = requests.get(data_url)
    with file_path.open('wb') as f:
        f.write(resp.content)
    print('Done!')
    return file_path

def show(*args, tags = []):
    """
    Display text or other data using Ipython
    
    Parameters
    ––––––––––
    x : str | default ``None``
        the value to display, if None,
        two empty lines are displayed
        
    tags : list of str | default ``[]``
        uses each element of tags as an HTML
        tag; tags will be applied from left
        to right, so the last tag in the 
        list will be the outermost applied
    
    Returns
    –––––––
    None
    """
    if type(args[0]) == list:
        args = args[0]
    assert (tags == []) or (type(tags[0]) == str), "tags must contain strings"
    for i in args:
        if type(i) != str:
            i = str(i)
        for tag in tags:
            i = f"<{tag}>{i}</{tag}>"
        display(Markdown(i))
    
def showtable(self, 
         allrows: bool = False, 
         columns: list = ["all"], 
         rows: int = 20, 
         start: int = 0,
         title: str = None,
         desc: bool = True):
        """
        Display pandas.DataFrame using custom values 

        Parameters
        ––––––––––
        allrows : bool | default ``False``
            Wether or not to show all rows
            
        columns : list | default ``["all"]``
            Default shows all columns. Set to list of 
            column names to select those columns
            
        rows : int | default ``20``
            How mant rows of the DataFrame to display.
            If rows < 0, displays the last 
            ``abs(rows)`` entries
            
        start: int | default ``0``
            What index to start displaying the DataFrame at
        
        title: str | default ``None``
            A title for the DataFrame to be displayed using
            ``show()``
        desc: bool | default ``True``
            Wether to display the DataFrame's size
        
        Returns
        –––––––
        None
        """
        if type(title) == str:
            show(title)
        elif title != None:
            show(title[0], title[1])
        settings = ['display.max_rows','display.max_columns',
                        'display.width','display.max_colwidth']
        [pd.set_option(i, None) for i in settings]
        loc, cols = 'head', self.columns
        if rows < 0:
            loc = 'tail'
        if columns != ["all"]:
            cols = columns
        if allrows:
            display(self[cols])
        if start or columns != ['all']:
            display(self[cols].iloc[start:start+rows , :])
        else:
            eval(f"display(self.{loc}({abs(rows)}))")
        [pd.reset_option(i) for i in settings]
        if desc:
            nrow, ncol = self.shape
            show(f"{nrow} Rows x {ncol} Columns", [])
            
def visualize(data):
    @interact(Kind = widgets.Dropdown(options=["Scatter Plot", "Histogram"], value = None))
    def plot_kind(Kind):
        cols = widgets.Dropdown(options=data.columns)
        if Kind == "Scatter Plot":
            show(">***NOTE:*** If you chose `Color By` to be a column with numeric data, " \
                 + "that will **disable the `Side Graph`** parameter")
            @interact(x = widgets.Dropdown(options=data.columns, value = None, 
                                           description = "X-Axis"), 
                      y = widgets.Dropdown(options=data.columns, value = None, 
                                           description = "Y-Axis"),
                      color = widgets.Dropdown(options= [None] + list(data.columns), value = None, 
                                               description = "Color By"),
                     marginal = widgets.Dropdown(options = [None, 'rug', 'box', 'violin','histogram'], 
                                                 value = 'histogram', description = "Side Graph"))
            def scatter_helper(x, y, marginal, color):
                if color != None and data[color].dtype == float:
                    marginal = None
                if (x != None and y != None):
                    px.scatter(data_frame = data, 
                               x = x, y = y, 
                               color = color,
                               color_continuous_scale='viridis', 
                               template = 'seaborn',
                               marginal_x = marginal, marginal_y = marginal,
                               title = f"'{x}' vs. '{y}'").show()
        if Kind == "Histogram":
            show("Using the `Color By` variable here leads to some odd displays",
                 "They aren't really usefull, but we've the option to se it in case you are curious",
                 "The default `None` gives a solid color")
            @interact(x = widgets.Dropdown(options=data.columns, value = None,
                                          description = "X-Axis"),
                      color = widgets.Dropdown(options=[None] + list(data.columns), value = None,
                                              description = "Color By"),
                     marginal = widgets.Dropdown(options = [None, 'rug', 'box', 'violin','histogram'], 
                                                 value = 'box', description = "Top Graph"))
            def hist_helper(x, marginal, color):
                if (x != None):
                    px.histogram(data_frame = data, 
                               x = x,
                               color = color, template = "seaborn",
                                marginal = marginal,
                                title = f"Distribution of '{x}'").show()

def workshop_hist():
    @interact(data = Dropdown(options = [None] + list(tables.keys()), description = "Workshop"),
              showall = ToggleButton(value=False, description=f'Show All Plots', icon = "eye", 
                                     button_style = "warning"))
    def step_1(showall, data):
        if showall:
            @interact(mode = Dropdown(options = [("Stacked", 'relative'), ("Side-By-Side", 'group')]))
            def show_all(mode):
                errors = []
                for key in tables.keys():
                    df = tables[key]
                    for x in df.columns:
                        for color in df.columns:
                            if (x != color):
                                try:
                                    px.histogram(data_frame = df, 
                                           x = x,
                                               color = color, 
                                                     template = "seaborn",
                                                         title = f"Distribution of '{x}'",
                                                            barmode = mode).show()
                                except Exception as e:
                                    errors.append(f"Encountered {e} when attempting to plot {x} from {key}, colored by {color}")
                for error in errors:
                    show(error, tags=["pre style='font-size:15px'"])
                        
                    
        elif data:
            key = data
            data = tables[data]
            @interact(x = widgets.Dropdown(options=data.columns, value = None,
                                                  description = "X-Axis"),
                              color = widgets.Dropdown(options=[i for i in [None] + list(data.columns) 
                                                                if i != "Feedback"], value = None,
                                                       description = "Color By"),
                     mode = Dropdown(options = [("Stacked", 'relative'), ("Side-By-Side", 'group')]))
            def hist_helper(x, color, mode):
                if x == "Feedback":
                    feedback = data["Feedback"]
                    for comment in feedback:
                        if pd.notna(comment):
                                show("*" + comment, tags=["pre style='font-size: 18px'"])
                                show("\n")
                            
                            
                elif (x != None):
                    try:
                        px.histogram(data_frame = data, 
                                   x = x,
                                   color = color, template = "seaborn",
                                    title = f"Distribution of '{x}'",
                                    barmode = mode).show()
                    except Error as e:
                        f"Encountered {e} when attempting to plot {x} from {key}," \
                        + f"colored by {color}"

pd.DataFrame.show = showtable
Widget.show = lambda x: display(x)

## Helper Functions

In [10]:
def outputData(output):
    found = eval(output.outputs[0]["data"]['text/plain'])
    print(found)
    return found

def select_workshops():
    instructions = ["Please select the workshops you'd like to look at below:",
                     "On a Mac, hold `command` (\u2318) while selecting the presentations you want to look at\n",
                     "On a PC, hold the `CRTL` button to do the same"]
    selectWidget = SelectMultiple(options = Presentation.dct(), value = [spend, cards])
    out = interactive_output(lambda x: display(x), {"x" : selectWidget})
    show(instructions), display(selectWidget)
    return out

def outputWorkshop(widget):
    try:
        found = widget.outputs[0]["data"]["text/plain"][1:-1].split(",")
        found = list(filter(lambda x: x != "", found))
        found = [Presentation.dct().get(shop.strip().replace("Presentation: ","").replace("\n ","")) for shop in found]
        print(found)
        return found
    except Exception as e:
        ### FIX THIS PART
        return

def fillRand(series):
    new_dict = {}
    possible = series[series.notna()]
    for i in range(series.size):
        if pd.isna(series.iloc[i]):
            series.iloc[i] = np.random.choice(possible)
    return series

def select_date():
    instructions = ["Either enter the date in the format `mm/dd/yyyy`,",
                    "or click the icon to use an interface to select the date"]
    selectWidget = DatePicker(description="Earliest Date", value = datetime.date.today())
    out = interactive_output(lambda x: display(x), {"x" : selectWidget})
    show(instructions), display(selectWidget)
    return out

def dateToDatetime(date):
    return datetime.datetime.combine(date, datetime.datetime.min.time())

## Class to Represent Each Presentation and Creation of Objects to Represent Current Presentation

In [4]:
class Presentation:
    def __init__(self, name:str, title:str=""):
        self.name = name
        self.link = f"https://tinyurl.com/bffs{name}data"
        self.viewrl = self.link + "view"
        self.title = title
        self.readData()
        self.clean()
        self.update()
        
    def __str__(self):
        if self.title != "":
            return self.title
        else:
            return self.name
    def __repr__(self):
        if self.title != "":
            return "Presentation: " + self.title
        else:
            return "Presentation: " + self.name
        
    def create():
        names = ["spend", "moving", "debt", "cards", "credit"]
        titles = ["Spending Plan", "Moving Out of the Dorms", 
                  "Dealing with Debt", "Credit Cards", "Credit"] 
        for name, title in zip(names, titles):
            globals()[name] = Presentation(name, title = title)
            show(f"<i>{title}</i> workshop has been created!", tags = ["b", "center"])
 
    def update(self):
        try:
            Presentation.workshop_list += [self]
        except Exception as e:
            Presentation.workshop_list = [self]
        try:
            Presentation.workshop_dict[self.title] = self
        except Exception as e:
            Presentation.workshop_dict = {self.title : self}
        
    def show(self, height = 700):
        instruction = "This page is also viewable " \
                    + f"at [this link]({self.viewrl})"
        show(instruction)
        return IFrame(self.viewrl, width = "100%", height = height)
    
    def getData(self):
        return self.data
        
    def lst(_ = None):
        return Presentation.workshop_list
    
    def dct(_ = None):
        return Presentation.workshop_dict
    
    def after(_ = None, date = datetime.date.min):
        for shop in Presentation.lst():
            shop.data = shop.data[shop.data["Timestamp"] > date]
    
    def getFeedback(self):
        responses = [response for response in self.feedback]
        feedback = ""
        for response in responses:
            feedback += f" * {response}\n"
        show(feedback)
    
    def readData(self):
        if self.name == "spend":
            self.data = (pd.concat([data.iloc[:, :11] 
                                    for data in pd.read_excel(self.link, None)
                                    .values()])
                         .reset_index())
        else:
            self.data = pd.read_csv(self.link)
        self.readFeedback()
        self.dropCols()
        self.toDateTime()
        return self.data
    
    def dropCols(self):
        to_drop = pd.Series(['index', 'Workshop Date (mm/dd/yy)', 'Presentation',
                             'Date: month/day/year', 'SID', 'Workshop Date',
                             'Event/Affiliated Student Group/Organization',
                             'If you would like to schedule a one-on-one ' \
                             + 'appointment to talk about your personal finances,' \
                             + ' please provide your email address:'])
        drop_these = to_drop[to_drop.isin(self.data.columns)]
        self.data = self.data.drop(columns = drop_these)
        
    def getFiller(self):
        fill_dict = {"debt" : {1 : "None", 2 : "No", 3 : "Don't know", 4 : "Medium Amount", 5 : fillRand, 7 : "Nslds.ed.gov"},
                     "cards" : {1 : "0", 2 : (noCard := "I don't have a credit card"), 3 : noCard, 4 : noCard, 5 : fillRand, 6 : fillRand},
                     "spend" :  {i : "Not Sure" for i in range(7)}}
        return fill_dict.get(self.name, {i : fillRand for i in range(self.data.shape[1])})


    def clean(self):
        self.rmna()
        self.fill(self.getFiller())

    def rmna(self):
        df = self.data.copy()
        minNonNa = df.shape[1] - ((df.shape[1] - 1) // 2) + 1
        df = df.dropna(thresh = minNonNa)
        df.columns = df.columns.str.strip()
        self.data = df


    def fill(self, valuedict = None):
        if valuedict:
            cols = (df := self.data.copy()).columns
            new_dict = {}
            for col, val in list(valuedict.items()):
                if callable(val):
                    df.iloc[:, col] = val(df.iloc[:, col].copy())
                else:
                    new_dict[cols[col]] = val
            df = df.fillna(new_dict)
            self.data = df
    
    def toDateTime(self):
        self.data["Timestamp"] = pd.to_datetime(self.data["Timestamp"])
        
    def readFeedback(self):
        data = self.data
        match = (data := self.data).columns.str.lower().str.contains(r"feedback|suggest")
        self.feedback = data.iloc[:, match].iloc[:, 0].dropna()
        self.data = self.data.drop(columns = data.columns[match][0])

## Run This Cell to Reset the Data and All Edits if Needed

In [5]:
Presentation.create()

<center><b><i>Spending Plan</i> workshop has been created!</b></center>

<center><b><i>Moving Out of the Dorms</i> workshop has been created!</b></center>

<center><b><i>Dealing with Debt</i> workshop has been created!</b></center>

<center><b><i>Credit Cards</i> workshop has been created!</b></center>

<center><b><i>Credit</i> workshop has been created!</b></center>

## Introduction
<p style="font-size:20px">Welcome to this interactive Jupyter Notebook!</p>
Today, we will be visualizing data from workshop feedback forms from the time period you're trying to study!
We'll go through a few procedures, and don't worry you won't need to code anything, and I'll walk you through the process! <br />
<b style="font-size:18px"> Now, lets get to it!</b>

<div class="alert alert-block alert-info">
<b style="font-size:20px">⚠️Disclaimer⚠️</b> <br>This notebook is curretly set up with the ability to analyze the following workshops:
    <ul>
        <li>Spending Plan</li>
        <li>Moving Out of the Dorms</li>
        <li>Dealing with Debt</li>
        <li>Credit Cards</li>
        <li>Credit</li>
    </ul>
    If a new workshop is added, please contact <a href="https://jonathanferrari.com/contact">Jonathan Ferrari</a> to update the notebook.
</div>

## Your Data

In order to do this, the first thing we need is some data! We'll be using the data collected from the *Google Forms*; we can get the data right from the *Google Sheets* that its collected in. To do this, we'll use the links from the workshop feedback folder. You can access the workshop feedback spreadsheets **[here](https://docs.google.com/spreadsheets/d/1HmZuYnTnKcpUJ7nKDn3Rx5M7c3qD3QDfbYbsfr_-Zf8/edit?usp=sharing)**, these are the sheets we'll use. They're living documents so using them later should still work fine!

### Walkthrough

In [6]:
selected = select_workshops()
selected

Please select the workshops you'd like to look at below:

On a Mac, hold `command` (⌘) while selecting the presentations you want to look at


On a PC, hold the `CRTL` button to do the same

SelectMultiple(index=(0, 3), options={'Spending Plan': Presentation: Spending Plan, 'Moving Out of the Dorms':…

Output()

## Select What Timeframe of Data You'd Like to Look At

We'll need to determine what data you want to look at. We'll assume you want to look at current data. So, we just need to decide what the earliest data you want to look at is. Use the cell below to choose that date! You can either enter it in number form, or click the calendar icon on the right to click a date.

In [7]:
start = select_date()

Either enter the date in the format `mm/dd/yyyy`,

or click the icon to use an interface to select the date

DatePicker(value=datetime.date(2022, 5, 19), description='Earliest Date')

## Using Your Input

Now, we'll use the inputs you gave to customize the visualizations!

In [8]:
# try:
selected, start = outputWorkshop(selected), dateToDatetime(outputData(start))
# except Exception as e:
#     if type(selected) != list:
#         show("Please run this cell again", tags = ["h3 style='color:#c7513a'", "center"])


KeyboardInterrupt



## Aggregation

In [None]:
Presentation.after(date = start)

In [None]:
selected.outputs[0]

In [None]:
outputWorkshop(selected)