In [2]:
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.metrics import pairwise_distances
from sklearn.preprocessing import StandardScaler
from gensim.models import Word2Vec
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import json

# 固定隨機數種子
np.random.seed(40)

# 定義Drink Dataset
data = {
    'Class': ['A', 'B', 'C', 'D', 'E', 'F', 'G'],
    'Drink': ['7Up', 'Sprite', 'Pepsi', 'Coke', 'Cappuccino', 'Espresso', 'Latte'],
    'Rank': [7, 6, 5, 4, 3, 2, 1],
    'Amount(μ,σ)': [(100, 200), (200, 10), (200, 10), (400, 100), (800, 10), (800, 10), (900, 400)],
    'Quantity(Range)': [(500, 1000), (500, 1000), (500, 1000), (500, 1000), (1, 500), (1, 500), (1, 500)],
    'Count': [100, 200, 100, 400, 400, 200, 100]  # 初始定义的Count值
}

# 建立DataFrame
df = pd.DataFrame(data)


final_data = {
    'Class': [],
    'Drink': [],
    'Rank': [],
    'Amount': [],
    'Quantity': [],
    'Count': []
}

# Amount和Quantity數據計算
for index, row in df.iterrows():
    mu, sigma = row['Amount(μ,σ)']
    quantity_range = row['Quantity(Range)']
    count = row['Count']
    
    amounts = np.random.normal(mu, sigma, count)
    quantities = np.random.randint(quantity_range[0], quantity_range[1] + 1, count)
    
    avg_amount = np.mean(amounts)
    avg_quantity = np.mean(quantities)
    
    final_data['Class'].append(row['Class'])
    final_data['Drink'].append(row['Drink'])
    final_data['Rank'].append(row['Rank'])
    final_data['Amount'].append(avg_amount)
    final_data['Quantity'].append(avg_quantity)
    final_data['Count'].append(count)

# 建立最终的DataFrame
final_df = pd.DataFrame(final_data)

# 正規化欄位
scaler = StandardScaler()
final_df[['Rank', 'Amount', 'Quantity', 'Count']] = scaler.fit_transform(final_df[['Rank', 'Amount', 'Quantity', 'Count']])

# 使用Word2Vec轉換
drinks = final_df['Drink'].tolist()
sentences = [[drink] for drink in drinks]

w2v_model = Word2Vec(sentences, vector_size=10, window=1, min_count=1, sg=1, seed=40)
drink_vectors = np.array([w2v_model.wv[drink] for drink in drinks])

drink_vectors_df = pd.DataFrame(drink_vectors, columns=[f'vec_{i}' for i in range(drink_vectors.shape[1])])
final_df = pd.concat([final_df, drink_vectors_df], axis=1)

# 計算Drink之間的距離
features = ['Rank', 'Amount', 'Quantity', 'Count'] + [f'vec_{i}' for i in range(drink_vectors.shape[1])]
similarity_matrix = pairwise_distances(final_df[features], metric='euclidean')

# 印出距離矩陣
print("Drink的距離矩陣:")
distance_df = pd.DataFrame(similarity_matrix, index=drinks, columns=drinks)
print(distance_df)


# t-SNE
tsne = TSNE(n_components=2, random_state=42, perplexity=2)
tsne_results_similarity = tsne.fit_transform(similarity_matrix)

# Dash應用
app = dash.Dash(__name__)

# 添加顏色列表
colors = ['red', 'green', 'blue', 'cyan', 'magenta', 'yellow', 'purple']
color_map = {drink: color for drink, color in zip(final_df['Drink'], colors)}

data = [
    go.Scatter(
        x=tsne_results_similarity[:, 0],
        y=tsne_results_similarity[:, 1],
        mode='markers+text',
        text=final_df['Drink'],
        marker=dict(size=12, color=[color_map[drink] for drink in final_df['Drink']]),
        textposition='top center'
    )
]

layout = go.Layout(
    title='t-SNE of Drinks on Similarity Matrix',
    xaxis=dict(title='Dimension 1'),
    yaxis=dict(title='Dimension 2'),
    hovermode='closest'
)

app.layout = html.Div([
    dcc.Graph(
        id='tsne-graph',
        figure=dict(data=data, layout=layout)
    ),
    html.Div([
        dcc.Markdown("""
            **Hover Data**
            Mouse over values in the graph.
        """),
        html.Pre(id='hover-data')
    ]),
    html.Div([
        dcc.Markdown("""
            **Click Data**
            Click on points in the graph.
        """),
        html.Pre(id='click-data'),
    ]),
    html.Div([
        dcc.Markdown("""
            **Selection Data**
            Choose the lasso or rectangle tool in the graph's menu
            bar and then select points in the graph.
        """),
        html.Pre(id='selected-data'),
    ])
])

@app.callback(
    Output('hover-data', 'children'),
    [Input('tsne-graph', 'hoverData')]
)
def display_hover_data(hoverData):
    return json.dumps(hoverData, indent=2)

@app.callback(
    Output('click-data', 'children'),
    [Input('tsne-graph', 'clickData')]
)
def display_click_data(clickData):
    return json.dumps(clickData, indent=2)

@app.callback(
    Output('selected-data', 'children'),
    [Input('tsne-graph', 'selectedData')]
)
def display_selected_data(selectedData):
    return json.dumps(selectedData, indent=2)

if __name__ == '__main__':
    app.run_server(debug=True)


Drink的距離矩陣:
                 7Up    Sprite         Pepsi      Coke  Cappuccino  Espresso  \
7Up         0.000000  1.017510  1.071149e+00  3.007474    4.336312  3.974350   
Sprite      1.017510  0.000000  9.728307e-01  2.003688    3.555650  3.425595   
Pepsi       1.071149  0.972831  4.214685e-08  2.547805    3.745725  3.168252   
Coke        3.007474  2.003688  2.547805e+00  0.000000    2.404297  3.007392   
Cappuccino  4.336312  3.555650  3.745725e+00  2.404297    0.000000  1.713040   
Espresso    3.974350  3.425595  3.168252e+00  3.007392    1.713040  0.000000   
Latte       4.514016  4.097352  3.644110e+00  3.887165    2.660979  1.082886   

               Latte  
7Up         4.514016  
Sprite      4.097352  
Pepsi       3.644110  
Coke        3.887165  
Cappuccino  2.660979  
Espresso    1.082886  
Latte       0.000000  
