In [4]:
import pandas as pd
import plotly.graph_objects as go

In [74]:
data = pd.read_csv("evaluation_data/report.csv")

In [75]:
data

Unnamed: 0,_id,question,true_answer,pre_trained_score,fine_tuned_score,fine_tuned_rag_score
0,0,Can you briefly describe the purpose of the TM...,The TM1 Web API session login establishes a se...,0.289856,0.400897,0.557957
1,1,What kind of parameters can be used to modify ...,"Based on the provided documentation, several p...",0.512015,0.620219,0.859431
2,2,What is the purpose of the `loadWebsheet()` fu...,The `loadWebsheet()` function's purpose is to ...,0.444958,0.546398,0.775292
3,3,What kind of control does the LoginDialog modu...,The LoginDialog module offers control over use...,0.698517,0.749992,0.865212
4,4,What is the order in which TM1 searches for di...,TM1 searches for display formats first in the ...,0.708351,0.874548,0.800631
...,...,...,...,...,...,...
995,995,What is the significance of having a toggle fo...,The toggle for automatic recalculation provide...,0.682023,0.853059,0.807778
996,996,How does the Amazon rainforest contribute to g...,The Amazon rainforest significantly contribute...,0.684349,0.879965,0.767715
997,997,What groups have access to a process when Secu...,The DataAdmin and SecurityAdmin groups have ac...,0.406296,0.518610,0.622738
998,998,What does the ‘commitActiveSandbox’ function d...,The ‘commitActiveSandbox’ function updates the...,0.226719,0.373403,0.488072


In [7]:
data.columns

Index(['_id', 'question', 'pre_trained_response', 'pre_trained_score',
       'fine_tuned_response', 'fine_tuned_score', 'fine_tuned__rag_response',
       'fine_tuned_score_rag'],
      dtype='object')

In [86]:
ploting_column = ['pre_trained_score', 'fine_tuned_score', 'fine_tuned_rag_score']

In [76]:
x_axis = range(0, len(data['question']))

fig = go.Figure()
for col in ploting_column:
    fig.add_trace(go.Scatter(x=data[data.columns[0]], y=data[col][200:250], mode="lines", name=col))
fig.update_layout(title="Model Accuracy Comparison", xaxis_title="Question", yaxis_title="Cosine Similarity Score")
fig.show()

In [77]:
fig = go.Figure()

# Create box plot for each model score
for col in ploting_column:
    fig.add_trace(
        go.Box(
            y=data[col][200:250],
            name=col,
            boxmean='sd',  
            marker=dict(color=None), 
            boxpoints='all',  
            jitter=0.3,
            whiskerwidth=0.2
        )
    )

fig.update_layout(
    title="Model Accuracy Comparison (Box Plot)",
    xaxis_title="Model Type",
    yaxis_title="Cosine Similarity Score",
    boxmode='group',
    boxgap=0.4,          
    boxgroupgap=0
)

fig.show()

In [78]:
data[ploting_column].std()

pre_trained_score       0.176924
fine_tuned_score        0.170910
fine_tuned_rag_score    0.132567
dtype: float64

In [84]:
import numpy as np
slice_idx = slice(200, 250)

# Compute means and basic stats
means = [data[col][slice_idx].mean() for col in ploting_column]
ymin = min([data[col][slice_idx].min() for col in ploting_column])
ymax = max([data[col][slice_idx].max() for col in ploting_column])
yrange = ymax - ymin if ymax > ymin else 1e-6

# Percent improvements between adjacent models
improvements = []
for i in range(len(means)-1):
    prev, curr = means[i], means[i+1]
    if prev == 0:
        pct = np.nan
    else:
        pct = ((curr - prev) / prev) * 100
    improvements.append(pct)

fig = go.Figure()

colors = ['#6C9EF8', '#2EC4B6', '#B58CFF']
for col, color in zip(ploting_column, colors):
    fig.add_trace(
        go.Box(
            y=data[col][slice_idx],
            name=col.replace('_', ' ').title(),
            boxmean='sd',
            marker_color=color,
            boxpoints='all',
            jitter=0.15,        # tighter spread
            pointpos=0,         # points centered on box
            whiskerwidth=0.2
        )
    )

# Add annotations between boxes using numeric x positions (0,1,2,...)
for i, pct in enumerate(improvements):
    # midpoint x (numeric) between box i and i+1
    mid_x = i + 0.5

    # y position: place label above the higher of the two means with a dynamic offset
    top_y = max(means[i], means[i+1])
    label_y = top_y + 0.10 * yrange  # 10% above the top mean

    # choose color based on increase/decrease
    if np.isnan(pct):
        txt = "N/A"
        txt_color = "black"
        bgcol = "rgba(200,200,200,0.3)"
    else:
        sign = "+" if pct >= 0 else ""
        txt = f"{sign}{pct:.2f}%"
        txt_color = "white"
        bgcol = "rgba(0,160,0,0.85)" if pct >= 0 else "rgba(220,20,60,0.85)"

    # Arrow target: point to the midpoint at the average of the two means
    arrow_y = (means[i] + means[i+1]) / 2

    fig.add_annotation(
        x=mid_x,
        y=label_y,
        xref='x',
        yref='y',
        text=f"<b>{txt}</b>",
        showarrow=True,
        arrowhead=2,
        ax=0,
        ay= - (0.08 * yrange),   # a short arrow downwards from the label
        font=dict(color=txt_color, size=16),
        align='center',
        bordercolor='rgba(0,0,0,0.15)',
        borderwidth=1,
        bgcolor=bgcol,
        opacity=1
    )

fig.update_layout(
    title="Model Accuracy Comparison & Percentage Improvement",
    xaxis_title="Model Type",
    yaxis_title="Cosine Similarity Score",
    boxmode='group',
    boxgap=0.18,
    boxgroupgap=0.06,
    template='plotly_white',
    margin=dict(l=80, r=40, t=80, b=60)
)

fig.show()

In [88]:
import plotly.figure_factory as ff

slice_idx = slice(0, 1000)

# Prepare data
hist_data = [data[col][slice_idx] for col in ploting_column]
group_labels = [col.replace('_', ' ').title() for col in ploting_column]
colors = ['#6C9EF8', '#2EC4B6', '#B58CFF']

# Create density plot (KDE-like overlay)
fig = ff.create_distplot(
    hist_data, group_labels,
    colors=colors,
    show_hist=False,  # only smooth curves
    show_rug=False
)

fig.update_layout(
    title="Score Distribution (Histogram / Density Plot)",
    xaxis_title="Cosine Similarity Score",
    yaxis_title="Density",
    template='plotly_white'
)

fig.show()

In [98]:
if not hasattr(pd.DataFrame, "iteritems"):
    pd.DataFrame.iteritems = pd.DataFrame.items

slice_idx = slice(200, 250)

fig = px.scatter_matrix(
    data.iloc[slice_idx][ploting_column],
    dimensions=ploting_column,
    color='fine_tuned_score',
    title="Scatter Plot Matrix – Relationships Between Models",
    labels={col: col.replace('_', ' ').title() for col in ploting_column}
)

# Improve layout and spacing
fig.update_traces(
    diagonal_visible=False,
    marker=dict(size=5, opacity=0.7)
)
fig.update_layout(
    template='plotly_white',
    width=950,          # widen
    height=850,         # taller
    margin=dict(l=60, r=60, t=80, b=60),
    font=dict(size=12),
    dragmode=False
)

fig.show()

In [106]:
slice_idx = slice(0, 1000)

mean_scores = [data[col][slice_idx].mean() for col in ploting_column]
labels = [col.replace('_', ' ').title() for col in ploting_column]
colors = ['#6C9EF8', '#2EC4B6', '#B58CFF']

fig = go.Figure(
    data=[
        go.Bar(
            x=labels,
            y=mean_scores,
            text=[f"{m:.3f}" for m in mean_scores],
            textposition='auto',
            marker_color=colors
        )
    ]
)

fig.update_layout(
    title="Mean Cosine Similarity Scores Comparison",
    xaxis_title="Model Type",
    yaxis_title="Mean Cosine Similarity Score",
    template='plotly_white'
)

fig.show()

In [104]:
data['fine_tuned_score'].mean()

0.5809350643721223

In [114]:
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Compatibility fix for Pandas >= 2.0
if not hasattr(pd.DataFrame, "iteritems"):
    pd.DataFrame.iteritems = pd.DataFrame.items

slice_idx = slice(200, 250)
df = data.iloc[slice_idx][ploting_column]

# ----- Create Subplots -----
combined_fig = make_subplots(
    rows=2, cols=3,
    subplot_titles=(
        "Pre vs Fine-Tuned", "Pre vs Fine-Tuned RAG", "Fine-Tuned vs RAG",
        "Score Distribution (Histogram / Density Plot)", None, None
    ),
    specs=[[{"type": "xy"}, {"type": "xy"}, {"type": "xy"}],
           [{"type": "xy", "colspan": 3}, None, None]],
    row_heights=[0.6, 0.4],
    vertical_spacing=0.12
)

# ----- Scatter plots -----
pairs = [
    ('pre_trained_score', 'fine_tuned_score'),
    ('pre_trained_score', 'fine_tuned_rag_score'),
    ('fine_tuned_score', 'fine_tuned_rag_score')
]

for i, (x_col, y_col) in enumerate(pairs, start=1):
    combined_fig.add_trace(
        go.Scatter(
            x=df[x_col], y=df[y_col],
            mode='markers',
            marker=dict(
                size=6,
                color=df['fine_tuned_score'],
                colorscale='Viridis',
                showscale=(i == 3),
                colorbar=dict(title="Fine Tuned Score")
            ),
            name=f"{x_col.replace('_', ' ').title()} vs {y_col.replace('_', ' ').title()}"
        ),
        row=1, col=i
    )

# ----- Histogram / Density -----
hist_data = [df[col] for col in ploting_column]
group_labels = [col.replace('_', ' ').title() for col in ploting_column]
colors = ['#6C9EF8', '#2EC4B6', '#B58CFF']

dist = ff.create_distplot(hist_data, group_labels, colors=colors, show_hist=False, show_rug=False)
for trace in dist['data']:
    combined_fig.add_trace(trace, row=2, col=1)

# ----- Layout -----
combined_fig.update_layout(
    height=900,
    width=1100,
    title="Model Relationships and Score Distribution",
    template='plotly_white',
    showlegend=True
)

combined_fig.update_xaxes(title_text="Score", row=2, col=1)
combined_fig.update_yaxes(title_text="Density", row=2, col=1)

combined_fig.show()

In [136]:
fig = go.Figure()

for col, color in zip(ploting_column, colors):
    fig.add_trace(go.Histogram(
        x=data[col][slice_idx],
        name=col.replace('_',' ').title(),
        marker_color=color,
        opacity=0.25,   
        histnorm='probability density',
        nbinsx=100
    ))

    x_vals = np.linspace(min(data[col][slice_idx]), max(data[col][slice_idx]), 200)
    kde = stats.gaussian_kde(data[col][slice_idx])
    fig.add_trace(go.Scatter(
        x=x_vals,
        y=kde(x_vals),
        mode='lines',
        line=dict(color=color, width=3),
        name=f"{col.replace('_',' ').title()} Density"
    ))

fig.update_layout(
    barmode='overlay',
    title="Score Distribution (Histogram + Density Curves)",
    xaxis_title="Cosine Similarity Score",
    yaxis_title="Density",
    template='plotly_white',
    height=600, width=950
)

fig.show()

In [127]:
import scipy.stats as stats