In [1]:
%load_ext autoreload
%autoreload 2

In [9]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import pandas as pd
# from plotly_wordcloud import plotly_wordcloud
from geopy.geocoders import Nominatim
import folium
from wordcloud import WordCloud, STOPWORDS
# from jupyter_dash import JupyterDash

stopwords = set(STOPWORDS)
# This ensures Plotly output works in multiple places:
# In VSCode and also nbconvert from jupyter notebook to HTML
# See https://plotly.com/python/renderers/#multiple-renderers
pio.renderers.default = "notebook_connected"


In [10]:
# Customize Plot
bnw = go.layout.Template(
    layout=go.Layout(
        xaxis=go.layout.XAxis(
            showline=True,
            linecolor="black",
            linewidth=2,
            mirror=True,
            title=""
        ),
        margin=go.layout.Margin(
            l=70,
            r=30
        ),
        yaxis=go.layout.YAxis(
            showline=True,
            linecolor="black",
            linewidth=2,
            mirror=True,
            title=""
        ),
        font=go.layout.Font(
            # family="Old Standard TT",
            size=12
        ),
        title=go.layout.Title(
            font=go.layout.title.Font(
                # family="Old Standard TT",
                size=18
            )
        ),
        # paper_bgcolor="#eaeaf2"
#         font=dict(color="white"),
#         plot_bgcolor="black"
        paper_bgcolor="#D0E3F1",
#         font=dict(color="white")
    ),
)

pio.templates["bnw"] = bnw

px.defaults.template = "seaborn+bnw"

In [11]:
# Functions: 
def clean_notebook():
    return 'Hi'

def data_checker(df: pd.Series, check_unique=False, check_null=True):
    '''
        Set check_unique=True to check for unique values for a specifically column.
        Number of nulls is checked automatically.

        Example:

        ug157#   df = pd.DataFrame([[1], [2], [2]], columns=["A"])
        ug157#   data_checker(df["A"], check_unique=True)

        Out:  
        Unique values [1 2]
        Number of NA Values: 0
    '''
    if check_unique:
        print("Unique values", df.unique())
    
    if check_null:
        print("Number of NA Values:", int(df.isnull().sum()))

# df = pd.DataFrame([[1], [2], [2]], columns=["A"])
# data_checker(df["A"], check_unique=True)
    

In [13]:
def create_table_with_dropdown(df_new,
                               columns
                               ):
    """
        Specify columns to count and plot counts in a table

        ug157#    a = pd.DataFrame({"a" : [1, 1, 2, 2], "b" : [2, 2, 3, 3]})
        ug157#    create_table_with_dropdown(a, ["a", "b"])
        
    """
    
    array_of_value_counts = [pd.DataFrame(df_new[column].value_counts()).reset_index() for column in columns]
    df_new_count_zero = array_of_value_counts[0]
    fig = go.Figure(go.Table(header={"values": ["Question", "Number of Students"]}, cells={"values": df_new_count_zero.T.values,
                                                                                           "height": 25}))
    fig.update_layout(
        updatemenus=[
            {   
                "bgcolor" : "white",
                "yanchor" : 'top',
                "xanchor" : 'center',
                "direction" : 'up',
                "y" : -0.5,
                "buttons" : [{
                    "label" : c,
                    "method": "update",
                    "args" : [
                        {
                            "cells" : {
                                "values" : array_of_value_counts[i].T.values 
                            },
                        },
                        { "title" : c}
                    ]
                } for i, c in enumerate(columns)]
            }
        ],
        paper_bgcolor="#D0E3F1"
    )
    fig.update_layout(title_text=columns[0])
    return fig


# df = pd.DataFrame({"a" : ["Male", "Female", "Female"], "b" : ["A", "A", "B"]})
# create_table_with_dropdown(df, ["a", "b"])


In [17]:
def create_figure_with_dropdown(df_new, 
                                options=["What is your gender?", "Are you a domestic or international student?"],
                                filters=None,
                                labels=None,
                                is_wordcloud=False,
                                sort_x=False):
    """
        Create bar chart with dropdown selects. Not Documentation on advanced usage with filters + labels.
        is_wordcloud=False is deprecated. Transitioned to plotting wordclouds using skimage.io.

        # Create Bar Chart Dropdown
        a = pd.DataFrame({"a" : [1, 1, 2, 2], "b" : [2, 2, 3, 3]})
        create_table_with_dropdown(a, ["a", "b"])

        # Create Bar Chart with Sorted Counts (sort_x=True). 
        # In this example, sort_x would cause Female to come first in the count plot instead of Male.
        df = pd.DataFrame({"a" : ["Male", "Female", "Female"], "b" : ["A", "A", "B"]})
        create_figure_with_dropdown(df, ["a", "b"], sort_x=True)
        
    """
    fig = None
    visibilities = []
    fig = go.Figure()
    
    if labels is None:
        labels = options

    # Generate Traces
    if is_wordcloud is False:
        unique_values_for_each_option = None 
        if filters is None:
            unique_values_for_each_option = [df_new[option].unique() for option in options]
        else:
            unique_values_for_each_option = [df_new.loc[filters[i], option].unique() for i, option in enumerate(options)]
    
        # Add initial traces
        length_per_option = [len(unique_values_for_one_option) for unique_values_for_one_option 
                             in unique_values_for_each_option]
        for i, unique_values_for_one_option in enumerate(unique_values_for_each_option):
            for unique_value in unique_values_for_one_option:
                # Have to figure out how to modularize this
                fig.add_trace(go.Histogram(x=list(df_new.loc[df_new[options[i]] == unique_value, 
                                                             options[i]]), 
                                        name=unique_value,
                                        visible=True if i == 0 else False,
                                        marker_autocolorscale=True))
        
        # i is the option being analyzed
        for i, _ in enumerate(options):        
            visibility = []
            # j are the lengths for each option
            for j, option_length in enumerate(length_per_option):
                arr = [True] * option_length if i == j else [False] * option_length
                visibility.extend(arr)
            
            visibilities.append(visibility)
    else:
        for i, option in enumerate(options):
            visible_array = [False for _ in options]
            fig.add_trace(plotly_wordcloud(' '.join(map(str, df_new[option]))))
            visible_array[i] = True
            if i != 0:
                visible_array[i - 1] = False
            visibilities.append(visible_array)

    args_arr = []
    for i, option in enumerate(options):
        args_arr.append({
            "categoryorder" : "array",
            "categoryarray" :  df_new[option].value_counts().index.tolist()
        } if filters is None else {
            "categoryorder" : "array",
            "categoryarray" :  df_new.loc[filters[i], option].value_counts().index.tolist()
        })

    # px.histogram(df, x="What is your gender?", color="What is your gender?")
    fig.update_traces(hovertemplate="%{x}=%{y}", marker_autocolorscale=True)
    fig.update_layout(
        updatemenus=[
            dict(
                bgcolor="white",
                active=0,
                yanchor='top',
                xanchor='center',
                direction='up',
                y=-0.5,
                buttons=list([
                    dict(label=labels[i],
                         method="update",
                         args=[
                            {"visible" : visibilities[i]},
                            {"xaxis" : args_arr[i] if sort_x else None},
                            {"title" : option_name},
                         ])
                for i, option_name in enumerate(options)])
                    
                    # dict(label="What is your gender?",
                    #     method="update",
                    #     args=[
                    #         {
                    #             "visible" : [True, False]
                    #         },
                    #         {
                    #             "title" : "What is your gender?"
                    #         },
                    #     ],
                    # ),
                    # dict(
                    #     label="Are you a domestic or international student?",
                    #     method="update",
                    #     args=[
                    #         {
                    #             "visible" : [False, True]
                                
                    #         },
                    #         {
                    #             "title" : "Are you a domestic or international student?",
                    #         },   
                    #     ],
                    # )
            )
        ],
        paper_bgcolor="#D0E3F1"
    )

    if sort_x:
        if filters is None:
            fig.update_xaxes(categoryorder="array", categoryarray=df_new[options[0]].value_counts().index.tolist())
        else:
            fig.update_xaxes(categoryorder="array", categoryarray=df_new.loc[filters[0], options[0]].value_counts().index.tolist())

    fig.update_layout(title_text=options[0])
    return fig

# df = pd.DataFrame({"a" : ["Male", "Female", "Female"], "b" : ["A", "A", "B"]})
# create_figure_with_dropdown(df, ["a", "b"], sort_x=True)

# df = pd.DataFrame({"a" : ["Male", "Female", "Female"], "b" : ["A", "A", "B"]})
# create_figure_with_dropdown(df, ["a", "b"], sort_x=False) # Default to False


# df = pd.DataFrame({"a" : ["He", "He", "He", "Loves", "Eating", "Strawberry", "Strawberry", "Wee"],
#                    "b" : ["Sa", "Sa", "Sa", "Sa", "Sa", "Sa", "Ge", "Yo"]})
# create_figure_with_dropdown(df, options=["a", "b"], is_wordcloud=False)

In [19]:
def remove_redundancy(series: pd.Series, delimeter=", "):
    """
        Remove redundancy from multiple choice data columns. Ex. "Course 6, Course 5" and "Course 5, Course 6" would both be
        modified to look like "Course 5, Course 6". 

        remove_redundancy(pd.Series(["5,6", "6,5", "6,6", "7,8", "8,7"]), delimeter=",")

        Out:
        0    5,6
        1    5,6
        2    6,6
        3    7,8
        4    7,8
        dtype: object
    """
    series_remove_redundancy = series.str.split(delimeter).map(lambda x: delimeter.join(sorted(x)))
    return series_remove_redundancy

# remove_redundancy(pd.Series(["5,6", "6,5", "6,6", "7,8", "8,7"]), delimeter=",")
    

In [21]:
def create_table(df_new,
                 column):
    '''
        Returns a table of value counts
    '''
    df_new_count = pd.DataFrame(df_new[column].value_counts()).reset_index()
    fig = go.Figure(go.Table(header={"values": ["Question", "Number of Students"]}, cells={"values": df_new_count.T.values,
                                                                                           "height": 25}))
    return fig

# a = pd.DataFrame({"a" : [1, 1, 2, 2]})
# create_table(a, "a")

In [26]:
def generate_word_map(series : pd.Series, file_path=None, width=1000, height=600):
    '''
        Saves a wordcloud into a specific file path
    '''
    cloud = WordCloud(background_color="white", max_words=200, mask=None, 
    stopwords=stopwords, width=width, height=height, colormap="tab20",
    min_font_size=8, max_font_size=125)
    
    cloud.generate(" ".join(map(str, series)))
    cloud.to_file(file_path)

# Saves a wordcloud into a specific file path and plot. Then plot the image

In [None]:
## Map setup, see Location Profile for Usage
def map_location_setup(location_series: pd.Series, data_coordinates: list[tuple]):
    # geolocator = Nominatim(user_agent="app")
    # vancouver_location = geolocator.geocode("Vancouver, BC")
    # m = folium.Map(location=(vancouver_location.latitude, vancouver_location.longitude), tiles="cartodbpositron",
    #            zoom_start=2)
    
    unique_locs = location_series.unique().tolist()
    num_per_unique_loc = location_series.value_counts()
    df_location_coordinates = pd.DataFrame(
        {
    "Coordinates" : data_coordinates
    },
    index=unique_locs)
    return pd.merge(df_location_coordinates, num_per_unique_loc, left_index=True, right_index=True)

################################################# CAN'T MODULARIZE FOLIUM FOR SOME REASON ############33
# def add_map_markers(df: pd.DataFrame, count_help_text,
#                     icon_object=None):
#     """
#         DataFrame format should look like 

#         | Coordinates | <Count_Column>       
#         -------------------------------

#         Index should be assigned to name of the location.
#         Output from map_location_setup is an acceptable input
#     """
#     m = create_map()
#     if icon_object is None:
#         icon_object = folium.Icon(icon="school", color='lightblue', prefix='fa')

#     for i in range(df.shape[0]):
#         current_data = df.iloc[i]
#         current_name = current_data.name
#         number_per_name = current_data[df.columns[1]]
#         html = f'''
#         <div style="display: flex; justify-content: left; flex-direction: column;">
#             <div style="padding:0 10px 10px 0;color:grey"><b>{current_name}</b></div>
#             <div style="padding:0 10px 10px 0;font-size:40;font-weight:100;text-align:center">{number_per_name}</div>
#             <div style="text-align:center;padding:0 10px 0 0;">{count_help_text}</div>
#         </div>
#         '''

#         iframe = folium.IFrame(html=html, width=170, height=170)
#         icon = icon_object
#         popup = folium.Popup(iframe)
#         folium.Marker(location=current_data["Coordinates"], popup=popup, icon=icon
#         ).add_to(m)

#     return m
    

def create_map():
    geolocator = Nominatim(user_agent="app")
    vancouver_location = geolocator.geocode("Vancouver, BC")
    m = folium.Map(location=(vancouver_location.latitude, vancouver_location.longitude), tiles="openstreetmap",
               zoom_start=2)
    folium.TileLayer('cartodbpositron').add_to(m)
    folium.LayerControl().add_to(m)
    return m

def get_figure_for_map(m: folium.Map, height=400):
    f = folium.Figure(height=height)
    m.add_to(f)
    return f
    