# Dashboard

In [15]:
from dash import Dash, html, dash_table, dcc, Output, Input
import pandas as pd
import plotly.express as px
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import io
import base64
from termcolor import colored

In [16]:
# Define a custom color palette with 15 different colors
custom_color_palette = ["#ff355e", "#fd5b78", "#ff6037", "#ff6037", "#ff9966",
                        "#ff9933", "#ffcc33", "#ffff66", "#ccff00","#66ff66", 
                        "#aaf0d1", "#16d0cb", "#50bfe6" ,"#9c27b0","#ee34d2",
                        "#ff00cc"] 

In [17]:
COLLAB = False # False # True

In [18]:
# For colab
if COLLAB:
    data = pd.read_csv('/content/drive/MyDrive/data_wrangled.csv', encoding='latin-1')
# For local
else:
    data = pd.read_csv('Datasets/data_wrangled.csv', encoding='latin-1')

data.head(5)

Unnamed: 0,title,author,categories,poem
0,The 80&rsquo;s Miracle Diet,By Melvin Dixon,"['Living', 'Health & Illness']",Yours free without the asking Quick delivery v...
1,All Saints&rsquo;,By Corey Van Landingham,[],Caravaggio's face in the sunken pumpkin Bulbs ...
2,And These Are Just a Few ...,By Melvin Dixon,"['Living', 'Health & Illness', 'Social Comment...",This poem is for the epidemic dead and the liv...
3,ASMR,By Corey Van Landingham,[],Why not climb up the mountain of delight To th...
4,ASMR,By Corey Van Landingham,[],Hello Tonight we will trace the static bough t...


In [19]:
data["poem"][44000]

"The Three Poets The second has written a sonnet upon the mutability of woman And the third writes an epigram to Candidia PAGANI'S Suddenly discovering in the eyes of the very beautiful Nor mande cocotte The eyes of the very learned museum assistant THE LAKE ISLE O God O Venus O Mercury patron of thieves Give me in due time I beseech you a little tobacco shop With the little bright boxes piled up neatly upon the shelves And the loose fragrant cavendish and the shag And the bright Virginia loose under the bright glass cases And a pair of scales not too greasy And the volailles dropping in for a word or two in passing For a flip word and to tidy their hair a bit O God O Venus O Mercury patron of thieves Lend me a little tobacco shop or install me in any profession Save this damn d profession of writing where needs s brains all the time"

In [20]:
# For colab
if COLLAB:
    topic_df = pd.read_csv('/content/drive/MyDrive/topic_df.csv', encoding='latin-1')
# For local
else:
    topic_df = pd.read_csv('Datasets/topic_df.csv', encoding='latin-1')

topic_df.head(15)

Unnamed: 0,Topics
0,"['night', 'come', 'go', 'know', 'sleep', 'hear..."
1,"['water', 'sea', 'wind', 'white', 'sky', 'ligh..."
2,"['poetry', 'say', 'know', 'think', 'poet', 'ma..."
3,"['shall', 'god', 'thy', 'thee', 'may', 'love',..."
4,"['us', 'beggar', 'faintly', 'blue', 'color', '..."
5,"['verse', 'poetry_magazine', 'flower', 'upon',..."
6,"['light', 'flute', 'music', 'bird', 'note', 'e..."
7,"['body', 'eye', 'hand', 'know', 'word', 'love'..."
8,"['room', 'light', 'wall', 'see', 'eye', 'air',..."
9,"['man', 'old', 'say', 'black', 'town', 'back',..."


In [21]:
print(topic_df["Topics"][1])

['water', 'sea', 'wind', 'white', 'sky', 'light', 'blue', 'green', 'tree', 'black', 'come', 'sun', 'river', 'stone', 'leave', 'eye', 'go', 'fall', 'dark', 'rock']


In [22]:
# For colab
if COLLAB:
    dashdata = pd.read_csv('/content/drive/MyDrive/dashdata.csv')
# For local
else:
    dashdata = pd.read_csv('Datasets/dashdata.csv')

dashdata.head(25)

Unnamed: 0,Document Index,Predominant Topic,Topic Distribution
0,0,1,"[0, 0.7104108, 0, 0, 0, 0.04767357, 0, 0, 0.05..."
1,1,5,"[0, 0.25395682, 0, 0, 0, 0.3283527, 0.20961152..."
2,2,2,"[0, 0.18636362, 0.5357853, 0, 0, 0, 0, 0, 0.10..."
3,3,10,"[0, 0, 0.23145735, 0.029389093, 0, 0, 0, 0, 0,..."
4,4,12,"[0, 0, 0, 0, 0.11058025, 0, 0, 0, 0, 0, 0.2841..."
5,5,10,"[0, 0, 0.0328935, 0, 0.32178554, 0, 0, 0.05563..."
6,6,10,"[0, 0.2884289, 0.07154473, 0, 0, 0, 0, 0, 0, 0..."
7,7,10,"[0.13097183, 0, 0.116376236, 0.13909899, 0.086..."
8,8,8,"[0, 0.18895902, 0, 0.042033803, 0, 0, 0, 0, 0...."
9,9,10,"[0, 0.17507647, 0, 0, 0, 0.25174677, 0.0757936..."


In [23]:
# Define the topics
topics = [f"Topic {i}" for i in range(1, topic_df.shape[0] + 1)]
print(topics)

['Topic 1', 'Topic 2', 'Topic 3', 'Topic 4', 'Topic 5', 'Topic 6', 'Topic 7', 'Topic 8', 'Topic 9', 'Topic 10', 'Topic 11', 'Topic 12', 'Topic 13', 'Topic 14', 'Topic 15']


In [24]:
dashdata['poem'] = data['poem']


In [25]:
dashdata.head(5)

Unnamed: 0,Document Index,Predominant Topic,Topic Distribution,poem
0,0,1,"[0, 0.7104108, 0, 0, 0, 0.04767357, 0, 0, 0.05...",Yours free without the asking Quick delivery v...
1,1,5,"[0, 0.25395682, 0, 0, 0, 0.3283527, 0.20961152...",Caravaggio's face in the sunken pumpkin Bulbs ...
2,2,2,"[0, 0.18636362, 0.5357853, 0, 0, 0, 0, 0, 0.10...",This poem is for the epidemic dead and the liv...
3,3,10,"[0, 0, 0.23145735, 0.029389093, 0, 0, 0, 0, 0,...",Why not climb up the mountain of delight To th...
4,4,12,"[0, 0, 0, 0, 0.11058025, 0, 0, 0, 0, 0, 0.2841...",Hello Tonight we will trace the static bough t...


In [26]:
print(len(dashdata))

44860


In [27]:
print("Size:", dashdata.shape)
dashdata.head(10)



Size: (44860, 4)


Unnamed: 0,Document Index,Predominant Topic,Topic Distribution,poem
0,0,1,"[0, 0.7104108, 0, 0, 0, 0.04767357, 0, 0, 0.05...",Yours free without the asking Quick delivery v...
1,1,5,"[0, 0.25395682, 0, 0, 0, 0.3283527, 0.20961152...",Caravaggio's face in the sunken pumpkin Bulbs ...
2,2,2,"[0, 0.18636362, 0.5357853, 0, 0, 0, 0, 0, 0.10...",This poem is for the epidemic dead and the liv...
3,3,10,"[0, 0, 0.23145735, 0.029389093, 0, 0, 0, 0, 0,...",Why not climb up the mountain of delight To th...
4,4,12,"[0, 0, 0, 0, 0.11058025, 0, 0, 0, 0, 0, 0.2841...",Hello Tonight we will trace the static bough t...
5,5,10,"[0, 0, 0.0328935, 0, 0.32178554, 0, 0, 0.05563...",For Didier Vermont leaves under glass Your han...
6,6,10,"[0, 0.2884289, 0.07154473, 0, 0, 0, 0, 0, 0, 0...",Say under land are remembered and muddy rushes...
7,7,10,"[0.13097183, 0, 0.116376236, 0.13909899, 0.086...",Galleria Nazionale d'Arte Antica Rome That s t...
8,8,8,"[0, 0.18895902, 0, 0.042033803, 0, 0, 0, 0, 0....",As in a net Headfirst on the prairie through t...
9,9,10,"[0, 0.17507647, 0, 0, 0, 0.25174677, 0.0757936...",After Kara lker's Blue Everyday some brown wom...


TODO: 
plot donde se pueda seleccionar un poema, con sus clean tokens y te salgan los 3 topics Id a los que pertenece. 
Y tu puedas ver luego cada topic ID las palabras que tiene y compararlas con el topic del poema. 

<div style="background-color:lightgreen; padding:10px;">
    <span style="color:black;">We highly Recommend to Run this Notebook and open the Dash in your browser. Default Port is 8050, to access it: <a href="http://localhost:8050/" style="color:green;">http://localhost:8050/</a></span>
</div>


In [28]:
# Initialize the app
app = Dash("ML_Final_Project")

# App layout
app.layout = html.Div([
    html.H1("Poems Topic Finder"),
    
    # Container for the input field and its label
    html.Div([
        html.H3('Introduce a Poem Number (0-44850)   '),
        dcc.Input(
            id='document-index-input',
            type='number',
            placeholder='Enter document index',
            min=0,
            max=dashdata['Document Index'].max(),  # Maximum document index
            value=12000  # Default value
        )],
         
    style={'display': 'flex', 'flex-direction': 'row', 'align-items': 'center'}),  # Align items horizontally
    
    # Container to display the predominant topic and words
    html.Div(id='predominant-topic-output'),

    # Dropdown menu to select a topic
     html.Div([
          html.Br(),html.Br(),
          html.H2("Select a Topic to see its words: "),
         dcc.Dropdown(
        id='topic-dropdown',
        options=[{'label': topic, 'value': topic} for topic in topics],
        value=topics[0]  # Default value
        ),
     ]),
    # Word Cloud to display the words associated with the selected topic
    html.Div(id='wordcloud-container'),

     html.Div([
          html.Br(),html.Br(),
          html.H2("Now let´s see the distribution of the Topics: "),
     ]),

    # Histogram showing the predominant LDA topic distribution
    dcc.Graph(
        figure=px.histogram(dashdata, x='Document Index', color='Predominant Topic', 
                            color_discrete_sequence=custom_color_palette, title='Predominant LDA Topic Distribution',
                            category_orders={'Predominant Topic': sorted(dashdata['Predominant Topic'].unique())})
    ),

    html.I("Notice that you can interact by clicking on the Topic Number and see that Topic alone or with the others"),
    html.H4("We have computed the amount of topics that we can observe for each of the poems. We can see that there are mainly 5 Topics that matter, and the other ones are not important at all."),
    html.H2("Let´s see the topics that are more predominant in the poems: "),

    # Histogram showing the distribution of the predominant topics
    dcc.Graph(
        figure=px.histogram(dashdata, x='Predominant Topic', color='Predominant Topic',
                                color_discrete_sequence=custom_color_palette, title='Distribution of Predominant Topics')
    ),

    html.I("Notice that you can interact by clicking on the Topic Number and see that Topic alone or with the others"),
    html.H4("We can see that effectively, only 7 topics are relevant in most of the documents, and many of them are not really important. Notice that we have chosen 15 topics because it is the best possible amount acording to the coherence score."),
    html.Br(), html.Br(), html.Br(), html.Br(),


    # Container to display document info
    html.Div([html.Div(id='document-info-container')]),

])


# Callback to update the Word Cloud based on the selected topic
@app.callback(
    Output('wordcloud-container', 'children'),
    [Input('topic-dropdown', 'value')]
)


def update_wordcloud(selected_topic):
    # Clear the previous plot
    plt.clf()
    
    # Get the index of the selected topic
    topic_index = topics.index(selected_topic)
    
    # Get the words associated with the selected topic from topic_df
    words = ''.join(topic_df.iloc[topic_index]['Topics'])
    words = ''.join([word.strip("'") for word in words])
    
    # Generate the Word Cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(words)
    
    # Convert the Word Cloud to a PIL image
    wordcloud_image = wordcloud.to_image()
    
    # Convert the PIL image to a base64 encoded string
    img_buffer = io.BytesIO()
    wordcloud_image.save(img_buffer, format="PNG")
    img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
    
    # Display the Word Cloud as an image in Dash
    return html.Div([   
        html.Img(src=f'data:image/png;base64,{img_base64}', style={'max-width': '100%'}), 
        html.Br(), html.Br(),
                     ])


# Callback to update the predominant topic and display document words based on the entered document index
@app.callback(
    Output('predominant-topic-output', 'children'),
    [Input('document-index-input', 'value')]
)
def update_predominant_topic(document_index):
    # Retrieve the predominant topic and clean tokens corresponding to the entered document index
    if document_index is not None and document_index in dashdata['Document Index'].values:
        document_index = int(document_index)
        document_info = dashdata[dashdata['Document Index'] == document_index]
        if not document_info.empty:
            predominant_topic = document_info.iloc[0]['Predominant Topic']
            poem = document_info.iloc[0]['poem']

            # Get the words associated with the predominant topic from topic_df
            topic_words = topic_df.iloc[predominant_topic - 1]['Topics']
            words = ''.join(topic_words)
            words = ''.join([word.strip("'") for word in words])

            # Generate the Word Cloud for the predominant topic
            wordcloud = WordCloud(width=800, height=400, background_color='white').generate(words)
            
            # Convert the Word Cloud to a PIL image
            wordcloud_image = wordcloud.to_image()
            img_buffer = io.BytesIO()
            wordcloud_image.save(img_buffer, format="PNG")
            img_base64 = base64.b64encode(img_buffer.getvalue()).decode()
            

            # Assuming `wrangled_poem` is a list of words
            colored_wrangled_poem = [colored(word, 'green') for word in poem]

            # Display the Word Cloud and the clean tokens
            return html.Div([
                html.H3(f'Content of the poem {document_index} is: '),
                html.P(poem),
                html.H3(f'The predominant Topic for Poem {document_index} is Topic {predominant_topic}:'),
                html.Img(src=f'data:image/png;base64,{img_base64}', style={'max-width': '100%'}),
                html.P(),
            ])


        else:
            return html.Div([
                html.H2('poem Index Not Found'),
                html.P('Please enter a valid poem index.')
            ])
    else:
        return html.Div([
            html.H2('No poem Index Entered'),
            html.P('Please enter a poem index to find the predominant topic and see the words in the poem.')
        ])



# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


<div style="background-color:lightgreen; padding:10px;">
    <span style="color:black;">We highly Recommend to Run this Notebook and open the Dash in your browser. Default Port is 8050, to access it: <a href="http://localhost:8050/" style="color:green;">http://localhost:8050/</a></span>
</div>
