# Testing the Statistical Analysis
## Using some of the agents as fake human reviewers.
- Load one or more ReviewSets
- For each Reviewer, gather the reviews and run create_ranking_vector
- Plot the Reviewers and the (fake) human reviewers
- Produce the heatmap comparing the reviewers
- [next - significance analysis]


### Set up parent directory and db connection

In [1]:
import sys
import os

# Add the parent directory of the current script to the Python path
cwd = os.getcwd()
dirname = os.path.dirname(cwd)
print(cwd)
print(dirname)
sys.path.append(dirname)

print(sys.path)

/Users/idekeradmin/Dropbox/GitHub/agent_evaluation/notebooks
/Users/idekeradmin/Dropbox/GitHub/agent_evaluation
['/Users/idekeradmin/Dropbox/GitHub/agent_evaluation/notebooks', '/opt/anaconda3/lib/python311.zip', '/opt/anaconda3/lib/python3.11', '/opt/anaconda3/lib/python3.11/lib-dynload', '', '/Users/idekeradmin/.local/lib/python3.11/site-packages', '/opt/anaconda3/lib/python3.11/site-packages', '/opt/anaconda3/lib/python3.11/site-packages/aeosa', '/Users/idekeradmin/Dropbox/GitHub/agent_evaluation']


In [2]:
from models.analysis_plan import AnalysisPlan
from services.analysisrunner import AnalysisRunner
from models.review_plan import ReviewPlan
from services.reviewrunner import ReviewRunner
from app.sqlite_database import SqliteDatabase
from app.config import load_database_config

# Load the db connection details
# db_type, uri, user, password = load_database_config(path='~/ae_config/test_config.ini')
# self.db = Database(uri, db_type, user, password)

_, database_uri, _, _ = load_database_config()
db = SqliteDatabase(database_uri)

## Generate AnalysisRuns
### Not doing this, change cell back to code if needed

analysis_plan_ids = [p1, p2, p3]
analysis_runs = []
for ap_id in analysis_plan_ids:
    analysis_plan = AnalysisPlan.load(db, ap_id)
    analysis_run = analysis_plan.generate_analysis_run()
    analysis_runs.append(analysis_run)

for analysis_run in analysis_runs:
    runner = AnalysisRunner(db, analysis_run.object_id)
    result = runner.run()



## Load the ReviewPlans, generate the ReviewSets
### Not doing this, change cell back to code if needed

review_plan_ids = ["review_plan_ad5c1fe8-dc76-4940-a938-bdfbd569f42d"]
review_sets = []
for review_plan_id in review_plan_ids:
    review_plan = ReviewPlan.load(db, review_plan_id)
    # Generate an empty ReviewSet from the ReviewPlan
    review_set = review_plan.generate_review_set()
    review_sets.append(review_set)

review_sets[0]

## Run the ReviewSets
### Not doing this, change cell back to code if needed
Populate the ReviewSets with Reviews

for review_set in review_sets:
    # Run the ReviewSet using a ReviewRunner
    runner = ReviewRunner(db, review_set.object_id)
    result = runner.run()

review_sets[0]

## Generate the Reviewer judgment vectors from the Reviews in the ReviewSets
The vectors correspond to the given ordered list of ReviewSets. They are not comparable otherwise.

We create a datastructure in which each Reviewer is associated with its judgment vector

In [3]:
from app.analysis import create_review_judgment_vector, create_judgment_vector
from models.analyst import Analyst
from models.review_set import ReviewSet

review_set_ids = ["review_set_71a86e48-9492-4cd8-9673-49c398de7498"]
review_sets = []
for review_set_id in review_set_ids:
    review_set = ReviewSet.load(db, review_set_id)
    review_sets.append(review_set)

reviewer_judgment_vectors = {}

# We get the ReviewPlan for the first ReviewSet so that we can get the list of Reviewers
# The list must be the same for all review sets - each Reviewer must see the same ReviewSets 
# TODO: add error checking to be sure that the Reviewers match across the ReviewSets
review_plan = ReviewPlan.load(db, review_sets[0].review_plan_id)
for reviewer_id in review_plan.analyst_ids:
#    reviewer = Analyst.load(db, reviewer_id)
    judgment_vector, review_jvecs = create_judgment_vector(db, review_sets=review_sets, reviewer_id=reviewer_id)
    reviewer_judgment_vectors[reviewer_id] = {"judgment_vector": judgment_vector,
                                               "review_judgment_vectors": review_jvecs}
    

reviewer_judgment_vectors

{'analyst_8148fd22-bdd8-4bda-910d-f7301f98a64d': {'judgment_vector': array([-1, -1,  0], dtype=int8),
  'review_judgment_vectors': {'analyst_8148fd22-bdd8-4bda-910d-f7301f98a64d': array([-1, -1,  0], dtype=int8)}}}

## Plot the judgment vectors
 - Label the points with the Reviewer names
 - The Reviewer names are prefixed with "H.", which is stripped off before labeling
 - Agents in red squares, Humans in blue circles.

In [4]:
from app.analysis import visualize_judgment_vectors

visualize_judgment_vectors(reviewer_judgment_vectors)

ValueError: n_components=2 must be between 0 and min(n_samples, n_features)=1 with svd_solver='full'

## Create the Reviewer similarity heatmap
 - higher similarity is more red

In [None]:
from app.analysis import reviewer_similarity_heatmap

reviewer_similarity_heatmap(reviewer_judgment_vectors)