## Imports

In [1]:
import pandas as pd
import numpy as np
from utils import *;
from queryAnswer import *;
from IPython.display import *;
import re
import json
import os
import sys

interactive(children=(Textarea(value='How tall is Mount Everest?', continuous_update=False, description='Quest…

## Helpers

In [2]:
def readDict(file):
    with open(f"dictionaries/{file}.json") as infile:
        subs = json.load(infile)
    return intKey(subs)

def sumDict(dicts):
    full = {}
    for dic in dicts:
        full = full | dic
    return full

def writeDict(dct, file):
    with open(f"dictionaries/{file}.json", "w") as outfile:
        json.dump(dct, outfile, indent=4, sort_keys=True)
    return dct

def addDict(file=None, dct = None, keys = None, values = None):
    if not dct: 
        dct = readDict(file)
    if keys and values:
        if type(keys) != list:
            dct[keys] += values
        else:
            assert len(keys) == len(values), \
            f"""Keys and values must have same length:
                Given Keys:{len(keys)} and Values:{len(values)}"""
            for key, value in zip(keys, values):
                dct[key] += value  
    writeDict(dct, file)
    
def emptyRetrieved():
    sk = readDict("substances")
    dct = {k : {} for k in sk.keys()}
    return writeDict(dct, "retrieved")

def intKey(dct):
    if (type(dct) == dict) and (dct != {}):
        try:
            return {int(k) : intKey(v) for k, v in dct.items()}
        except Exception as e:
            return dct
    else:
        return dct

In [3]:
def flatten(t):
    return sum([[i] if type(i) != list else i for i in t], [])

In [4]:
def getExperiences(subID, most=20, conditions=None):
    add =  "&a=experience_data" + \
          f"&substance_id={subID}&max={most}" + \
           "&format=xml"
    url = link + add 
    val_dict = {}
    content = requests.get(url).text
    exps = re.split(r"<experience>|</experience>", content)[1:-1]
    while ("\n\n\n" in exps):
        exps.remove("\n\n\n")
    dct = {}
    for i in range(len(exps)):
        exp = Experience(exps[i])
        fits = True
        if conditions:
            for condition in conditions:
                met = eval(condition)
                fits = fits and met
        if fits:
            dct[exp.get("id")] = exp
    return dct

def experienceList(subID, most=20, conditions = None):
    dct = getExperiences(subID, most=most, conditions = conditions)
    return list(dct.values())

In [5]:
def retrieve(
    subID : int,
    most : int = 20, 
    conditions : list = None):
    """
    Add substances to ``dictionaries/retrieved.json``
    that meet all conditions in ``conditions``,
    and update the file accordingly

    Parameters
    ----------
    subID : int
        The numerical ID of the substance to 
        get experiences from.
    most : int = 20
        The maximum amount of experiences to pull;
        this is the number of all experiences, the 
        number which meet ``conditions`` may be much
        lower.
    conditions : list = None
        Conditions that each experience must meet in
        order to be part of the returned dictionary.

    Returns
    -------
    Updated Dictionary : dict
    """
    dct = getExperiences(subID, most, conditions)
    retrieved = readDict("retrieved")
    working = retrieved[subID]
    for k, v in dct.items():
        if not (k in working.keys()):
            working[k] = vars(v)
    retrieved[subID] = working
    writeDict(retrieved, "retrieved")

## Experience Class

In [6]:
class Experience:
    
    def fromDict(dct : dict):
        return Experience(**dct)
    
    def __repr__(self):
        dct  = self.info
        ss, ln = dct["substance-string"].strip(), dct["list-number"]
        return f"Experience: {{ Substance: {ss},  List-ID: {ln} }}"
        
    def __str__(self):
        return self.name
        
    def __init__(self, string : str, 
                 info = None, text = None, name = None):
        self.string = string
        self.readInfo()
        for i, k in self.info.items():
            if self.info.get(i) == "":
                self.info[i] = None
        self.makeName()
    
    def get(self, key : str):
        if key in (inf := self.info):
            return inf.get(key)
        else:
            print(f"{key} is not a valid key.",
                  "Please select one of the following: \n")
            for key in inf.keys():
                if key == "text":
                    print(key)
                else:
                    print(key, end = "," + " "*(30 - len(key)) + "\t")
                    
    def isPure(self):
        tpe = (type(self.get("substance-id-list")) == int)
        nme = self.get("substance-string")
        mul = not (("," in nme) or ("&" in nme))
        return tpe and mul
    
    def isFirst(self):
        return 2 in self.get("category-id-list")
        
    def readInfo(self):
        vallst = re.findall(r"<(.*)>(.*)</.*>", self.string)
        dct = dict(vallst)
        for key in dct.keys():
            try:
                dct[key] = eval(dct[key])
            except Exception as e:
                dct[key] = dct[key]
        self.info = dct
        txt = re.split(r"<experience-text>|</experience-text>", self.string)[1]
        self.info['text'] = txt
        self.text = txt
        self.info["list-number"] = self.info["list-number"] - 1
        
    def makeName(self):
        dct = self.info
        name = dct["substance-string"].strip() + str(dct["list-number"])
        self.name = name.lower().replace(" ", "_").replace("&", "n")
        
    def listInfo(self):
        return list(self.info.keys())

    def toJSON(self):
        return json.dumps(self, default=lambda o: vars(o), 
            sort_keys=True, indent=4)

## Variables

### link

In [7]:
base_url = "https://erowid.org/experiences/research/"
api_code = "exp_api.php?api_code=berkeley_bcsp_tyrone_2022"
link = base_url + api_code

## Tables

In [8]:
substances = pd.read_csv("csv/substance.csv")

### dictionaries

In [9]:
sk = readDict("substances")

In [10]:
to_collect = {"Cannabis" : [1], 
               "LSD" : [2], 
               "MDMA" : [3], 
               "Psilocybin/Mushrooms" : [39, 66, 239], 
               "DMT" : [18], 
               "Mescaline/Cacti/Children" : [36, 809, 543, 826]}

In [12]:
retrieved = readDict("retrieved")

In [13]:
collected = {k : sumDict([retrieved[i] for i in v]) 
             for k, v in to_collect.items()}

In [14]:
writeDict(collected, "collected");

## Pre-Processing

https://erowid.org/experiences/research/exp_api.php?api_code=berkeley_bcsp_tyrone_2022