## 3) Create an interactive Dashboard aimed at older adults (65+) with specific features to summarise the most important aspects of the data and identify through your visualisation why this dataset is suitable for Machine Learning models in an online retail business. Explain how your dashboard is designed with this demographic in mind.

In [1]:
import warnings
warnings.filterwarnings("ignore")
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.graph_objs as go

df = pd.read_csv('Groceries.csv')

day_mapping = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}

df['name_of_day'] = df['day_of_week'].map(day_mapping)

top_10_members = df['Member_number'].value_counts().nlargest(10).index

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Member Purchase History", style={'font-size': '32px'}),
    
    # Dropdowns and label
    html.Div([
        # Label and dropdown for selecting Member_number
        html.Div([
            html.Label('Select Member Number:', style={'font-size': '18px'}),
            dcc.Dropdown(
                id='member-dropdown',
                options=[{'label': member, 'value': member} for member in top_10_members],
                value=top_10_members[0],  # Default value
                multi=False,
                style={'width': '100%', 'font-size': '16px'}
            ),
        ], style={'display': 'inline-block', 'width': '48%'}),
        
        # Label and dropdown for selecting year
        html.Div([
            html.Label('Select Year:', style={'font-size': '18px'}),
            dcc.Dropdown(
                id='year-dropdown',
                options=[{'label': year, 'value': year} for year in df['year'].unique()],
                value=df['year'].unique()[0],  # Default value
                multi=False,
                style={'width': '100%', 'font-size': '16px'}
            ),
        ], style={'display': 'inline-block', 'width': '48%'}),
    ]),

    html.Div([
        # Pie
        html.Div([
            dcc.Graph(id='top-items-pie'),
        ], style={'display': 'inline-block', 'width': '42%'}),
        
        # Bar
        html.Div([
            dcc.Graph(id='item-description-bar'),
        ], style={'display': 'inline-block', 'width': '53%'}),
    ]),

    # Scatter plot
    html.Div([
        html.Div([
            dcc.Graph(id='month-scatter'),
        ], style={'display': 'inline-block', 'width': '53%'}),
        
        # Pie #2
        html.Div([
            dcc.Graph(id='day-of-week-pie'),
        ], style={'display': 'inline-block', 'width': '42%'}),
    ]),
    
    # Top 7 items
    html.Div([
        html.Div([
            dcc.Graph(id='year-comparison-bar'),
        ], style={'display': 'inline-block', 'width': '100%'}),
    ]),
])

# Define callback to update all the charts based on dropdown selections
@app.callback(
    [Output('item-description-bar', 'figure'),
     Output('top-items-pie', 'figure'),
     Output('month-scatter', 'figure'),
     Output('day-of-week-pie', 'figure'),
     Output('year-comparison-bar', 'figure')],
    [Input('member-dropdown', 'value'),
     Input('year-dropdown', 'value')]
)
def update_charts(selected_member, selected_year):
    filtered_df = df[(df['Member_number'] == selected_member) & (df['year'] == selected_year)]
    
    # Bar
    item_counts = filtered_df['itemDescription'].value_counts()
    bar_figure = {
        'data': [
            {'x': item_counts.index,
             'y': item_counts.values,
             'type': 'bar',
             'name': 'Item Count',
             'marker_colors': px.colors.qualitative.Plotly},
        ],
        'layout': {
            'title': f'Items',
            'yaxis': {'title': 'Count'},
            'font': {'size': 15}
        }
    }

    # Pie
    top_items = item_counts.head(5)
    pie_figure = {
        'data': [
            {'labels': top_items.index,
             'values': top_items.values,
             'type': 'pie',
             'name': 'Top 5 Items',
             'marker_colors': px.colors.qualitative.Plotly},
        ],
        'layout': {
            'title': f'Top 5 Items',
            'font': {'size': 15}
        }
    }

    # Scatter plot
    month_counts = filtered_df.groupby('month')['itemDescription'].count()
    scatter_figure = {
        'data': [
            {'x': month_counts.index,
             'y': month_counts.values,
             'mode': 'markers',
             'name': 'Item Count',
             'marker_colors': px.colors.qualitative.Plotly},
        ],
        'layout': {
            'title': f'Monthly purchases',
            'xaxis': {'title': 'Month'},
            'yaxis': {'title': 'Item Count'},
            'font': {'size': 15}
        }
    }

    # Pie #2
    day_of_week_counts = filtered_df['name_of_day'].value_counts()
    day_of_week_pie_figure = {
        'data': [
            {'labels': day_of_week_counts.index,
             'values': day_of_week_counts.values,
             'type': 'pie',
             'name': 'Day of Week Items',
            'marker_colors': px.colors.qualitative.Plotly},
        ],
        'layout': {
            'title': f'Items by Day of Week',
            'font': {'size': 15}
        }
    }

    # Bar #2
    year_comparison_counts = df.groupby('year')['itemDescription'].value_counts().unstack().T
    top_7_items_2014 = year_comparison_counts[2014].nlargest(7).index
    top_7_items_2015 = year_comparison_counts[2015].nlargest(7).index
    common_top_7_items = set(top_7_items_2014) | set(top_7_items_2015)
    
    year_comparison_counts = year_comparison_counts.loc[common_top_7_items, [2014, 2015]]
    year_comparison_bar_figure = {
        'data': [
            {'x': year_comparison_counts.index,
             'y': year_comparison_counts[2014],
             'type': 'bar',
             'name': '2014',
             'marker_colors': px.colors.qualitative.Plotly},
            {'x': year_comparison_counts.index,
             'y': year_comparison_counts[2015],
             'type': 'bar',
             'name': '2015',
              'marker_colors': px.colors.qualitative.Plotly},
        ],
        'layout': {
            'title': f'Best Sales for Years 2014 and 2015',
            'yaxis': {'title': 'Count'},
            'barmode': 'group',
            'font': {'size': 20}
        }
    }

    return bar_figure, pie_figure, scatter_figure, day_of_week_pie_figure, year_comparison_bar_figure

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

This dashboard is a combination of all the amount of purchases the member of the store has made. I decided to filter the information by 'Member number' and 'year'. With this interesting combination I can create usefull dashboards that will be helpful to gain insights about the shopping list of the customers. This will also help to gather information to be able to make a market basket analysis after comparing several members purchases history.

At the top of the dashboard, you will see the title of the main product. Just below our title, we can appreciate two dropdown menus which you can select the member number ID and the year you want to filter.
Then in the first row of our dashboard we will find two plots: 
On the left side I decided to put a pie chart which will show the percentage distribution of the top 5 items purchased for the selected member and year. I make the decision to make it a little bit smaller than the other because I considerate that the bar plot to the right is more important. Every slice of the pie chart represent an item. The percentage distribution is showed by the size of the slice and the number percent. I did them big so it can be clear to the user.
On the right side there is a bar graph displaying the count of each item that the member purchased in the selected year. This will help to make a list and show a clear display which items are the most frequent. I use font size 15 on the labels to make them clear and big enough for my customers.

Getting to the second row, I did an scatter-plot in the right side showing the relation between the count of the purchaed items by month. Next to it I did another pie chart showing which day of the week the selected member do his purchases. This information is key to group by month or day of the selected year and get insights about the history of the selected member.

In the bottom of the dashboard I did a comparison bar chart comparing the top 7 purchased items during 2014 and 2015. The reason I created this chart is to show our clients how the best seller items changed over the years.

You can interact with the dashboard by selecting the member and the year you want to filter the information. When you changed the selected values, the chart will update by itselft with the new parameters and will show the new information. 

About the colors I used for this dashboards, I selected a palette of colors which are soft to the eyes and show coherence across all the different visualizations. With this dashboard you will be able to explore trends, patterns or preferences of the members which will be able to fill a dataset to do a basket market analysis in order to boost the sales of some of the items.

## References:

Stack Overflow. (n.d.). What’s difference between item-based and content-based collaborative filtering? [online] Available at: https://stackoverflow.com/questions/16372191/whats-difference-between-item-based-and-content-based-collaborative-filtering 

PYTHON CHARTS | The definitive Python data visualization site. (2022). Bar chart in plotly. [online] Available at: https://python-charts.com/ranking/bar-chart-plotly/#:~:text=If%20you%20want%20to%20change

GitHub. (n.d.). Indexing pandas data frame with set will be deprecated · Issue #7 · aertslab/pycistarget. [online] Available at: https://github.com/aertslab/pycistarget/issues/7

docs.rapidminer.com. (n.d.). FP-Growth - RapidMiner Documentation. [online] Available at: https://docs.rapidminer.com/latest/studio/operators/modeling/associations/fp_growth.html#:~:text=The%20FP%2DGrowth%20algorithm%20is

Jodha, R. (2023). FP Growth Algorithm in Data Mining. [online] Scaler Topics. Available at: https://www.scaler.com/topics/data-mining-tutorial/fp-growth-in-data-mining/ 

Lim, Y. (2022). Data Mining: Market Basket Analysis with Apriori Algorithm. [online] Medium. Available at: https://towardsdatascience.com/data-mining-market-basket-analysis-with-apriori-algorithm-970ff256a92c

Toptal Design Blog. (n.d.). Dashboard Design - Considerations and Best Practices | Toptal®. [online] Available at: https://www.toptal.com/designers/data-visualization/dashboard-design-best-practices#:~:text=Great%20dashboards%20are%20clear%2C%20intuitive.

dash.plotly.com. (n.d.). Dash Documentation & User Guide | Plotly. [online] Available at: https://dash.plotly.com/.

www.youtube.com. (n.d.). Dropdown Selector- Python Dash Plotly. [online] Available at: https://www.youtube.com/watch?v=UYH_dNSX1DM&list=PLh3I780jNsiSvpGtPucq4yusBXVt3SL2Q 

www.youtube.com. (n.d.). Introduction to Plotly Dash Web Application Development Framework | Plotly Dash Tutorial Part -1. [online] Available at: https://www.youtube.com/watch?v=Ma8tS4p27JI&list=PLH6mU1kedUy8fCzkTTJlwsf2EnV_UvOV-