# Bottling Company Optimization Results Analysis

This notebook analyzes the weight optimization results for the bottling company client and explores why all configurations are returning zero scores.

In [None]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
plt.style.use('ggplot')
sns.set(font_scale=1.2)
sns.set_style("whitegrid")

## 1. Load the Optimization Results

In [None]:
# Load the optimization results
with open('bottling_optimization_results.json', 'r') as f:
    bottling_results = json.load(f)

# Print the structure to understand the data
print("Keys in the results file:")
print(list(bottling_results.keys()))

client_key = 'bottling_company_client'
print(f"\nProfile configurations for {client_key}:")
print(list(bottling_results[client_key].keys()))

## 2. Load Test Data to Understand the Ground Truth

In [None]:
# Load the test data to examine ground truth documents
with open('test_data.json', 'r') as f:
    test_data = json.load(f)

# Examine the bottling company test data
if client_key in test_data:
    print(f"\nTest data available for {client_key}")
    
    # Check if relevant_docs exists
    if 'relevant_docs' in test_data[client_key]:
        print(f"Number of ground truth documents: {len(test_data[client_key]['relevant_docs'])}")
        print("Ground truth documents:")
        for doc in test_data[client_key]['relevant_docs']:
            print(f"  - {doc}")
    else:
        print("No relevant_docs field found in test data")
        print("Available keys in test data:")
        print(list(test_data[client_key].keys()))
else:
    print(f"No test data found for {client_key}")

## 3. Analyze Profile Components

In [None]:
# Load the bottling company profile to understand what's being used for recommendations
try:
    with open('../profiles/bottling_company_client.json', 'r') as f:
        profile = json.load(f)
    
    print("Profile components:")
    for component, data in profile.items():
        if component == 'historical_documents':
            print(f"\n{component}: {len(data)} documents")
            for doc in data[:5]:  # Show first 5 documents
                print(f"  - {doc}")
            if len(data) > 5:
                print(f"  - ... and {len(data)-5} more")
        elif component == 'expert_profile':
            print(f"\n{component}: {len(data.get('description', '').split())} words")
        elif component == 'categorical_preferences':
            print(f"\n{component}:")
            for category, prefs in data.items():
                print(f"  - {category}: {len(prefs)} preferences")
        elif component == 'component_weights':
            print(f"\n{component}:")
            for weight_name, weight_value in data.items():
                print(f"  - {weight_name}: {weight_value}")
        else:
            print(f"\n{component}: {type(data)}")
except FileNotFoundError:
    print("Profile file not found")

## 4. Compare Ground Truth with Historical Documents

In [None]:
# Check if ground truth documents overlap with historical documents
try:
    historical_docs = set(profile.get('historical_documents', []))
    ground_truth_docs = set(test_data[client_key].get('relevant_docs', []))
    
    print(f"Number of historical documents: {len(historical_docs)}")
    print(f"Number of ground truth documents: {len(ground_truth_docs)}")
    
    # Check overlap
    overlap = historical_docs.intersection(ground_truth_docs)
    print(f"\nNumber of overlapping documents: {len(overlap)}")
    if overlap:
        print("Overlapping documents:")
        for doc in overlap:
            print(f"  - {doc}")
    
    # Documents in ground truth but not in historical
    missing = ground_truth_docs - historical_docs
    print(f"\nDocuments in ground truth but not in historical: {len(missing)}")
    if missing:
        print("Missing documents:")
        for doc in missing:
            print(f"  - {doc}")
except (KeyError, NameError):
    print("Could not compare documents - data missing")

## 5. Analyze the Nature of Zero Results

In [None]:
# Extract all weights and scores to understand patterns
def extract_weights_results(results, profile_type, metric_key):
    all_configs = []
    
    # Extract similarity weights
    if 'similarity' in results[profile_type]:
        for result in results[profile_type]['similarity'][metric_key]['all_results']:
            config = {
                'config_type': 'similarity',
                'score': result['score']
            }
            config.update(result['weights'])
            all_configs.append(config)
    
    # Extract personalization weights
    if 'personalization' in results[profile_type]:
        for result in results[profile_type]['personalization'][metric_key]['all_results']:
            config = {
                'config_type': 'personalization',
                'score': result['score']
            }
            config.update(result['weights'])
            all_configs.append(config)
            
    # Extract embedding weights
    if 'embedding' in results[profile_type]:
        for result in results[profile_type]['embedding'][metric_key]['all_results']:
            config = {
                'config_type': 'embedding',
                'score': result['score']
            }
            config.update(result['weights'])
            all_configs.append(config)
    
    return pd.DataFrame(all_configs)

# Extract all weight configurations
precision_df = extract_weights_results(bottling_results, client_key, 'precision@10')
ndcg_df = extract_weights_results(bottling_results, client_key, 'ndcg@10')

print(f"Total weight configurations evaluated for precision: {len(precision_df)}")
print(f"Total weight configurations evaluated for NDCG: {len(ndcg_df)}")
print(f"\nUnique precision scores: {precision_df['score'].unique()}")
print(f"Unique NDCG scores: {ndcg_df['score'].unique()}")

## 6. Recommended Next Steps

Based on the analysis above, here are some recommended next steps:

1. **Adjust K value**: Try using smaller k values (e.g., k=3 or k=5) to increase the granularity of the evaluation metrics.

2. **Review ground truth data**: Verify that the ground truth documents in test_data.json are correct and relevant to the bottling company profile.

3. **Check for gaps between test data and historical documents**: If the test data documents are completely different from the historical documents, this could explain why we're seeing zero scores.

4. **Test with alternative metrics**: Consider using Mean Reciprocal Rank (MRR) or evaluating with a larger k (e.g., k=20 or k=30) to potentially capture more relevant documents.

5. **Examine the expert profile**: Review the bottling company expert profile to ensure it accurately reflects the company's interests and is specific enough to recommend relevant documents.

6. **Check for document availability**: Verify that the ground truth documents are actually available in the index being searched by the recommender system.

7. **Consider cross-validation**: If possible, use cross-validation techniques to ensure the evaluation is robust.

8. **Expand test data**: Consider adding more relevant documents to the ground truth set to create a larger target for the recommender system.

## 7. Implementation of K Value Adjustment

To quickly test whether smaller k values might yield better results, here's code to simulate what precision and recall would look like with different values of k.

In [None]:
def simulate_precision_at_k(num_relevant_docs, relevant_positions, k_values):
    """Simulate precision@k for different k values
    
    Args:
        num_relevant_docs: Total number of relevant documents
        relevant_positions: List of positions (1-indexed) where relevant docs appear
        k_values: List of k values to evaluate
    """
    results = {}
    for k in k_values:
        # Count relevant docs in top k
        relevant_in_top_k = sum(1 for pos in relevant_positions if pos <= k)
        precision = relevant_in_top_k / k if k > 0 else 0
        recall = relevant_in_top_k / num_relevant_docs if num_relevant_docs > 0 else 0
        results[k] = {'precision': precision, 'recall': recall}
    
    return results

# Let's assume we have 10 relevant documents and one of them appears at position 1
# This is a hypothetical scenario to illustrate the impact of k
relevant_positions = [1, 15, 23, 42, 50, 67, 85, 99, 110, 120]
k_values = [1, 3, 5, 10, 20, 50, 100]

results = simulate_precision_at_k(10, relevant_positions, k_values)

# Create dataframe for visualization
results_df = pd.DataFrame({
    'k': list(results.keys()),
    'precision': [result['precision'] for result in results.values()],
    'recall': [result['recall'] for result in results.values()]
})

# Plot the results
fig, ax = plt.subplots(figsize=(10, 6))
results_df.plot(x='k', y='precision', marker='o', ax=ax, label='Precision@k')
results_df.plot(x='k', y='recall', marker='s', ax=ax, label='Recall@k')
ax.set_title('Impact of k Value on Precision and Recall')
ax.set_xlabel('k Value')
ax.set_ylabel('Score')
ax.grid(True)
plt.tight_layout()
plt.show()

# Display the numerical results
print("Precision and Recall at different k values:")
print(results_df.round(3).to_string(index=False))