In [1]:
import sys
sys.path.append("../")

import os
from pathlib import Path
import wandb
import shortuuid

from confection import Config
from desci_sense.configs import init_config

In [2]:
def log_pred_wandb(wandb_run, result):

    # get a unique ID for this prediction
    pred_uid = shortuuid.ShortUUID().random(length=8)

    artifact = wandb.Artifact(f"pred_{wandb_run.id}_{pred_uid}", type="prediction")

    columns = ["User", "URL", "Text", "Reasoning Steps", "Predicted Label", "True Label", "Tweet ID"]

    # extract relevant columns from prediction
    pred_row = [
        result['tweet']['user_name'],
        result['tweet']['tweetURL'],
        result['tweet']['text'],
        result['answer']['reasoning'],
        result['answer']['final_answer'],
        "", # no gold label meanwhile
        result['tweet']['tweetID']
    ]
    data = [pred_row]

    # add data to table
    table =  wandb.Table(data=data, columns=columns)
    artifact.add(table, "predictions")

    # log immediately since we don't know when user will close the session
    wandb.log_artifact(artifact)
    

    

    


In [3]:
config = init_config()

In [4]:
print(config.to_str())

[model]
model_name = "mistralai/mistral-7b-instruct"
temperature = 0.6

[prompt]
template_path = "desci_sense/prompting/templates/p2.txt"

[wandb]
wand_entity = "common-sense-makers"
project = "st-demo"
wandb_db_name = "test-DB"


In [5]:
api = wandb.Api()

TYPE = "prediction"
PROJECT = "st_demo-v0.2"

artifacts = []
collections = [
    coll for coll in api.artifact_type(type_name=TYPE, project=PROJECT).collections()
]


for coll in collections:
    for artifact in coll.versions():
        artifacts += [artifact]


In [6]:
len(artifacts)

16

In [7]:
artifact = artifacts[0]

In [8]:
artifact

<Artifact QXJ0aWZhY3Q6NjMxOTAyMTA0>

In [9]:
dd = artifact.download()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [10]:
dd

'./artifacts/pred_rhjdpsuk_GmRyKBUV:v0'

In [11]:
table_path = f"{dd}/predictions.table.json"

In [12]:
import pandas as pd

In [13]:
import json
raw_data = json.load(Path(table_path).open())

In [14]:
raw_data["data"]

[['Nathan Schneider',
  'https://twitter.com/ntnsndr/status/1721589953509953620',
  "In case you're not on the fediverse, here's a thread I just did on the ongoing WeWork collapse and why (surprise surprise) it should have been a co-op: https://social.coop/@ntnsndr/111364589153380497 #exittocommunity",
  "[Reasoning Steps]\n\n1. The post is about the ongoing collapse of WeWork.\n2. The author is discussing the reasons for the collapse.\n3. The author suggests that the collapse should have been a cooperative.\n4. The author is advocating for a different model of business.\n5. The author is sharing their thoughts and opinions on a current event.\n\n[Candidate Tags]\n\n1. <event>: This tag is suitable because the post is about a current event, specifically the collapse of WeWork.\n2. <review>: This tag is not suitable because the post is not a review of a reference.\n3. <announce>: This tag is not suitable because the post is not an announcement of new research.\n4. <reading>: This tag is

In [16]:
raw_data["columns"]

['User',
 'URL',
 'Text',
 'Reasoning Steps',
 'Predicted Label',
 'True Label',
 'Name of Label Provider',
 'Tweet ID']

In [19]:
from tqdm import tqdm

In [37]:
rows = []
cols = None
for artifact in tqdm(artifacts, total=len(artifacts)):
    a_path = artifact.download()
    table_path = Path(f"{a_path}/predictions.table.json")
    raw_data = json.load(table_path.open())
    if not cols:
        cols = raw_data["columns"]
    else:
        assert cols == raw_data["columns"]

    # add wandb name to identify each artifact    
    row = raw_data["data"][0] + [artifact.name]
    rows += [row]

# add column for wandb name
cols += ["wandb name"]
    




  0%|          | 0/16 [00:00<?, ?it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
  6%|▋         | 1/16 [00:00<00:03,  4.74it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
 12%|█▎        | 2/16 [00:00<00:02,  5.33it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
 19%|█▉        | 3/16 [00:00<00:02,  6.12it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
 25%|██▌       | 4/16 [00:00<00:01,  6.46it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
 31%|███▏      | 5/16 [00:00<00:01,  6.48it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
 38%|███▊      | 6/16 [00:00<00:01,  6.53it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
 44%|████▍     | 7/16 [00:01<00:01,  5.19it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
 50%|█████     | 8/16 [00:01<00:01,  5.10it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
 56%|█████▋    | 9/16 [00:01<00:01,  5.34it/s][34m[1mwandb[0m:   1 of 1 files downloaded.  
 62%|██████▎   | 10/16 [00:01<00:01,  5.39it/s][34m[1mwa

In [39]:
row

['John Tasioulas',
 'https://twitter.com/JTasioulas/status/1660582630641872898',
 'For a long time political philosophers ignored democracy in favour of building increasingly arcane theories of justice, or they saw justice as in conflict with democracy. This book by @dsallentess seeks to put democracy at the very heart of justice. Looking forward to reading. https://t.co/O9Ii3we0b8',
 '[Reasoning Steps]\n\n1. The post is about a book.\n2. The author of the book is mentioned.\n3. The book is about democracy and justice.\n4. The author is a political philosopher.\n5. The post is a positive endorsement of the book.\n\n[Candidate Tags]\n\n<review> - This tag is suitable because the post is a positive endorsement of the book.\n<announce> - This tag is not suitable because the post is not announcing a new research output.\n<job> - This tag is not suitable because the post is not about a job.\n<event> - This tag is not suitable because the post is not about an event.\n<reading> - This tag is 

In [36]:
cols

['User',
 'URL',
 'Text',
 'Reasoning Steps',
 'Predicted Label',
 'True Label',
 'Name of Label Provider',
 'Tweet ID',
 'wandb name']

In [38]:
pd.DataFrame(data=rows, columns=cols)

ValueError: Shape of passed values is (144, 1), indices imply (144, 9)

In [25]:
table_path.parent.name

'pred_kezzchu2_AfZd8RPg:v0'

In [32]:
artifact.name

'pred_kezzchu2_AfZd8RPg:v0'