[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/danielmlow/llm_course/blob/main/openrouter_api.ipynb)

In [1]:
import json
import requests
import os
import pandas as pd

# Measure constructs using LLMs

In [2]:


def process_api_output(output_str):
  """
  Processes the API output string and returns a dictionary.

  Args:
    output_str: The string output from the API call.

  Returns:
    A dictionary containing the extracted data.
  
  Example:
    
    output_str1 = '{  "desire to escape": [[1], ["I want out"]],  "loneliness": [[1], ["No one cares about me"]],  "suicidal ideation": [[0.5], ["I want out", "It wont get better"]] }'
    output_str2 = '{  "desire to escape": [[1], ["I want out"]],  "loneliness": [[1], ["No one cares about me"]],  "suicidal ideation": [[0.5], ["I want out", "It wont get better"]] }Explanation: - The text clearly expresses a "desire to escape" with the phrase "I want out", which suggests a strong desire to leave the current situation.- The text also clearly expresses "loneliness" with the phrase "No one cares about me", which indicates feelings of isolation and disconnection.- The text may suggest "suicidal ideation" with the phrases "I want out" and "It wont get better", but it\'s not explicitly stated, hence the lower score.'
    output_str3 = '{  "desire to escape": [[1], ["I want out"]],  "loneliness": [[1], ["No one cares about me"]],  "suicidal ideation": [[0.5], ["I want out", "It wont get better"]] }Some additional information here.'

    print(process_api_output(output_str1))
    print(process_api_output(output_str2))
    print(process_api_output(output_str3))
  """
  data = {} 
  start_index = output_str.find('{') 
  end_index = output_str.rfind('}') + 1

  try:
    # Attempt to directly load the JSON string
    data = json.loads(output_str)
  except json.JSONDecodeError:
    # If JSON decoding fails, try to extract the JSON part
    if start_index != -1 and end_index != -1:
      json_part = output_str[start_index:end_index]
      data = json.loads(json_part)
    else:
      raise ValueError("Invalid API output format.")

  # Extract the additional note if it exists
  if start_index != 0 or end_index != len(output_str):
    data['Additional note'] = output_str[end_index:].strip()

  return data



In [3]:

def openrouter_request(prompt, OPENROUTER_API_KEY, model, temperature, safety_settings=None, verbose=False):
    """
    Makes a request to OpenRouter API. Returns (result, metadata) tuple.
    If there's an error, returns (None, None) to allow skipping the failed request.
    
    Args:
        prompt: The prompt to send to the API
        OPENROUTER_API_KEY: Your OpenRouter API key
        model: The model to use
        temperature: The temperature parameter
        safety_settings: Optional safety settings
        verbose: If True, print detailed logs. Default is False.
    
    free models: 20 requests per minute and 200 requests per day. See https://openrouter.ai/docs/limits
    """
    # Configure session with retry logic
    session = requests.Session()
    retry = requests.adapters.HTTPAdapter(
        max_retries=3
    )
    session.mount('http://', retry)
    session.mount('https://', retry)
    
    try:
        if verbose:
            print(f"\nMaking request to OpenRouter API...")
            print(f"Model: {model}")
            print(f"Temperature: {temperature}")
        
        # Try without proxy first
        try:
            response = session.post(
                url="https://openrouter.ai/api/v1/chat/completions",
                headers={
                    "Authorization": f"Bearer {OPENROUTER_API_KEY}",
                    "Content-Type": "application/json",
                    "HTTP-Referer": "https://github.com/danielmlow/talklife",
                    "X-Title": "TalkLife Research"
                },
                json={
                    "model": model,
                    "temperature": temperature,
                    "response_format": { "type": "json_object" },
                    "messages": [
                        { "role": "user", "content": prompt}
                    ],
                },
                timeout=30,
                proxies=None
            )
            if verbose:
                print(f"Response status code: {response.status_code}")
            
        except requests.exceptions.ProxyError as e:
            
            print(f"Proxy error detected: {str(e)}")
            print("Retrying with direct connection...")
            
        
        # Check if request was successful
        response.raise_for_status()
        
        try:
            metadata = response.json()
            if verbose:
                print("Successfully received JSON response")
            final_result = metadata['choices'][0]['message']['content']
            try:
                final_result = dict(eval(final_result))
                if verbose:
                    print("Successfully parsed response as dictionary")
                return final_result, metadata
            except Exception as e:
                
                print(f"Error parsing response as dictionary: {str(e)}")
                print("Attempting to process output...")
                final_result = process_api_output(final_result)
                return final_result, metadata
                
        except json.JSONDecodeError as e:
            
            print(f"Error decoding JSON response: {str(e)}")
            print(f"Response text: {response.text[:500]}...")  # Print first 500 chars
            return None, None
            
    except requests.exceptions.RequestException as e:
        
        print(f"Request failed: {str(e)}")
        if hasattr(e, 'response') and e.response is not None:
            print(f"Response status code: {e.response.status_code}")
        print(f"Response text: {e.response.text[:500]}...")  # Print first 500 chars
        return None, None
    except Exception as e:
        
        print(f"Unexpected error: {str(e)}")
        return None, None
    finally:
        
        session.close()



# Set API key and choose model
- API key is associated to your account. You can add money to use paid models. 
- All models: https://openrouter.ai/rankings
- Paid models:
    - Google's State of the art (SOTA): 'google/gemini-2.5-pro'
    - Google's fast and cheap: 'google/gemini-2.5-flash'
    - Google's even cheaper 'google/gemini-2.5-flash-lite'
    - OpenAI's SOTA model: 'openai/gpt-5'
    - OpenAI's fast and cheap: 'openai/gpt-5-mini'
    - Anthropic's SOTA model: 'anthropic/claude-sonnet-4'
    - Anthropic's fast and cheap: 'anthropic/claude-3.5-haiku'
    - There are also even better reasoning models but very expensive and overkill for this use case
- Free models: https://openrouter.ai/models?max_price=0
    - free models: certain requests per minute (e.g., 20) and N requests per day (depends on model). See https://openrouter.ai/docs/limits
    - "google/gemini-2.0-flash-exp:free"
    - "deepseek/deepseek-chat-v3.1:free"
    - "meta-llama/llama-3.1-70b-instruct:free"
    - "qwen/qwen3-coder:free"



In [4]:
# It's a good idea to keep keys in a separate file such as api_keys.py (not shared with anyone) which you can import
try:
    import api_keys # local file
    OPENROUTER_API_KEY = api_keys.OPENROUTER_API_KEY 
except:
    # if not, set key here
    print("Module 'api_key' not found. Add your key here. ") # get it from OpenRouter AI: https://openrouter.ai/settings/keys
    OPENROUTER_API_KEY = "" # This key will disactivate if you place it on a public repository. Get your own and add some dollars from OpenRouter AI: https://openrouter.ai/settings/keys

# Choose a model
model  = "google/gemini-2.5-flash" 

# Design a prompt

In [5]:
prompt_template = """Classify the text into one or more of the following {context} categories: {categories}.

Provide a score (0 or 1) as to whether the text clearly mentions the category and an explanation (words or phrases from the text that are clearly about the category).

Text: 
{text}

Structure your response in the following JSON format (no extra text):
{{'category_A': [[score], [words, phrases]], 'category_B': [[score], [words, phrases]], ...}}

JSON:
"""

In [6]:
categories = ['desire to escape', 'suicidal ideation', 'anger', 'loneliness']

In [7]:
text = 'No one cares about me. I go to therapy, but it doesnt work. It wont get better. I want out. Im feeling hyperactive.'

In [8]:
prompt = prompt_template.format(context = '',
              categories = categories,
              text= text
              )

print('Prompt:')
print(prompt)

Prompt:
Classify the text into one or more of the following  categories: ['desire to escape', 'suicidal ideation', 'anger', 'loneliness'].

Provide a score (0 or 1) as to whether the text clearly mentions the category and an explanation (words or phrases from the text that are clearly about the category).

Text: 
No one cares about me. I go to therapy, but it doesnt work. It wont get better. I want out. Im feeling hyperactive.

Structure your response in the following JSON format (no extra text):
{'category_A': [[score], [words, phrases]], 'category_B': [[score], [words, phrases]], ...}

JSON:



### With definitions


In [None]:

prompt_template_with_definitions = """Classify the text into one or more of the following {context} categories with their corresponding definitions:\n\n{categories}

Provide a score (0 or 1) as to whether the text clearly mentions the category and an explanation (words or phrases from the text that are very prototypical expressions of the category).

Text: 
{text}

Structure your response in the following JSON format (no extra text):
{{'category_A': [[score], [words, phrases]], 'category_B': [[score], [words, phrases]], ...}}

JSON:
"""

# Definitions and examples of the Suicide Risk Lexicon: https://github.com/danielmlow/construct-tracker/blob/e9897da4f53efe40745aa5dcec923cc56830c23e/src/construct_tracker/data/lexicons/suicide_risk_lexicon_v1-0/suicide_risk_lexicon_codebook_prototypical_examples.txt
# https://osf.io/preprints/psyarxiv/vf2bc_v3 

categories_with_definitions = {'desire to escape': 'wish to escape from awareness of current life problems their implications about the self. Examples: escape; paralyzing; no escape; want to get out; there is no exit; want to escape; craving release; escape pain; have to escape; running out of options', 
              'suicidal ideation': "Suicidal thoughts desiring one's own death or related states. Examples: I want to die; better off dead; want to be dead; want to die; wish I was dead; wish I wasn't alive; wish I would not wake up; would be better off dead", 
              'anxiety': "Anxiety is a feeling of worry, nervousness, or unease, often about an imminent event or something with an uncertain outcome. We include anxiety disorders. Examples: Xanax; phobia; apprehension; worry; nervous; xanax; can't relax; diazepam; phobic; anxiety; anxious; worried; agoraphobia; social anxiety",
              'loneliness': "aversive state experienced when a discrepancy exists between the interpersonal relationships one wishes to have and those that one perceives they currently have. The perception that one's social relationships are not living up to some expectation. Examples: no one to turn to; no one will miss me; nobody thinks about me; nobody wants me here; no friends; no one misses me; I am alone; no one cares; have no one; don't have anyone; feel ignored; have nobody; lonely; no one to talk; no one I can talk to",
              }

# indent dict one line per entry
categories_with_definitions = '\n'.join([f"{key}: {value}" for key, value in categories_with_definitions.items()])

# Insert into prompt
prompt_with_definitions = prompt_template_with_definitions.format(context = '', # I change to prompt_template_with_definitions
              categories = categories_with_definitions,
              text= text
              )


print('Prompt:')
print(prompt_with_definitions)


Prompt:
Classify the text into one or more of the following  categories with their corresponding definitions:

desire to escape: wish to escape from awareness of current life problems their implications about the self. Examples: escape; paralyzing; no escape; want to get out; there is no exit; want to escape; craving release; escape pain; have to escape; running out of options
suicidal ideation: Suicidal thoughts desiring one's own death or related states. Examples: I want to die; better off dead; want to be dead; want to die; wish I was dead; wish I wasn't alive; wish I would not wake up; would be better off dead
anxiety: Anxiety is a feeling of worry, nervousness, or unease, often about an imminent event or something with an uncertain outcome. We include anxiety disorders. Examples: Xanax; phobia; apprehension; worry; nervous; xanax; can't relax; diazepam; phobic; anxiety; anxious; worried; agoraphobia; social anxiety
loneliness: aversive state experienced when a discrepancy exists b

# Submit prompt to openrouter


In [10]:
# Choose which prompt to use

final_result, metadata = openrouter_request(prompt_with_definitions, OPENROUTER_API_KEY, model = model, temperature=0)

In [11]:
# Recap what the text was
text

'No one cares about me. I go to therapy, but it doesnt work. It wont get better. I want out. Im feeling hyperactive.'

In [12]:
final_result

{'desire to escape': [1, ['I want out']],
 'suicidal ideation': [0, []],
 'anxiety': [0, []],
 'loneliness': [1, ['No one cares about me']]}

In [13]:
metadata

{'id': 'gen-1758661807-HuXYihh2HJqZPXjjpPpQ',
 'provider': 'Google',
 'model': 'google/gemini-2.5-flash',
 'object': 'chat.completion',
 'created': 1758661807,
 'choices': [{'logprobs': None,
   'finish_reason': 'stop',
   'native_finish_reason': 'STOP',
   'index': 0,
   'message': {'role': 'assistant',
    'content': '{\n  "desire to escape": [\n    1,\n    [\n      "I want out"\n    ]\n  ],\n  "suicidal ideation": [\n    0,\n    []\n  ],\n  "anxiety": [\n    0,\n    []\n  ],\n  "loneliness": [\n    1,\n    [\n      "No one cares about me"\n    ]\n  ]\n}',
    'refusal': None,
    'reasoning': None}}],
 'usage': {'prompt_tokens': 444,
  'completion_tokens': 95,
  'total_tokens': 539,
  'prompt_tokens_details': {'cached_tokens': 0},
  'completion_tokens_details': {'reasoning_tokens': 0, 'image_tokens': 0}}}

# loop through text documents


In [14]:
server = False # Options: "google", "local", False  # True: Running on colab and looking for files on on google drive; False: run on your local computer and find files locally

# Obtain documents

if server == 'google':
    # ON GOOGLE DRIVE
    from google.colab import drive
    drive.mount('/content/drive')
    input_dir = '/content/drive/MyDrive/your_folder/' # Or find using os.listdir('/content/')
    output_dir = '/content/drive/MyDrive/your_folder/'
elif server == 'local':
    # ON YOUR COMPUTER
    input_dir = './' # add path to input_dir
    output_dir = './'
else:
    # Load default data
    reddit_df = pd.read_csv('https://mair.sites.fas.harvard.edu/datasets/rmhd_27subreddits_1300posts_train.csv', index_col = 0)
    reddit_df_subset = reddit_df[reddit_df['subreddit'].isin(['suicidewatch', 'anxiety'])]
    reddit_df_subset_random = reddit_df_subset.sample(frac=1, random_state=42)
    reddit_df_subset_random_5 =  reddit_df_subset_random[['subreddit', 'post']][:5]
    # Add an incrementing number to the beginning of the key and create a dictionary
    texts = {
        f"{i+1}_{row.subreddit}": row.post 
        for i, row in enumerate(reddit_df_subset_random_5.itertuples(index=False))
    }
    
    output_dir = './data/output/'

os.makedirs(output_dir, exist_ok=True)
if server in ['google', 'local']:
    # load txt files. Loop through text 
    print('current working directory: ', os.getcwd()) 
    texts = {}

    for file in os.listdir(input_dir):
        if file.endswith('.txt'):
            with open(os.path.join(input_dir, file), 'r') as f:
                text = f.read()
                texts[file] = text

texts



{'1_suicidewatch': "31 and ready to stop existing I've made so many mistakes, medicated or not that desire is still there. Husband decides he's ready to divorce mostly all of my fault. I don't see a way to ever feel happy again. The years of depression (18 years) have won.\n",
 '2_anxiety': "Freezing up I'm new to the sub, so I don't really know if this is the right place to ask for help, but I've been told that this is probably linked to some form of anxiety. I've recently gotten over depression through the help of a medication, and since I've started feeling better, I've been feeling a lot more on edge, and social situations and various other interactions have gotten a lot harder. There are times where I'll be talking with somebody (this can be any person, talking about anything. I've had it happen with teachers, relatives, and even some of my closest friends, so I don't think its related to who I'm talking to), and some kind of response is prompted, I come up with an answer/response

# Text de-identification

In [None]:
!pip install presidio-analyzer
!pip install presidio-anonymizer
!python -m spacy download en_core_web_lg

from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine
from tqdm import tqdm



for file, text  in texts.items():
    

    # Set up the engine, loads the NLP module (spaCy model by default) 
    # and other PII recognizers
    analyzer = AnalyzerEngine()

    # Call analyzer to get results
    results = analyzer.analyze(text=text,
                            entities=["PERSON","NAME", "PHONE_NUMBER","NUMBER"], #You could add LOCATION
                            language='en')
    # print(results)

    # Analyzer results are passed to the AnonymizerEngine for anonymization

    anonymizer = AnonymizerEngine()

    anonymized_text = anonymizer.anonymize(text=text,analyzer_results=results)
    texts[file] = anonymized_text.text



# Loop through texts and apply openrouter



In [None]:
results = {}

for file, text  in texts.items():
    prompt = prompt_template_with_definitions.format(context = 'mental health',
              categories = categories_with_definitions,
              text= text
              )

    
    print(prompt)
    
    # Make request. WARNING: this costs some money depending on model
    response, metadata = openrouter_request(prompt, OPENROUTER_API_KEY, model = model, temperature=0)
    print('---------------------')
    print(response)
    results[file] = response
    print('\n========================\n')


Classify the text into one or more of the following mental health categories with their corresponding definitions:

desire to escape: wish to escape from awareness of current life problems their implications about the self. Examples: escape; paralyzing; no escape; want to get out; there is no exit; want to escape; craving release; escape pain; have to escape; running out of options
suicidal ideation: Suicidal thoughts desiring one's own death or related states. Examples: I want to die; better off dead; want to be dead; want to die; wish I was dead; wish I wasn't alive; wish I would not wake up; would be better off dead
anxiety: Anxiety is a feeling of worry, nervousness, or unease, often about an imminent event or something with an uncertain outcome. We include anxiety disorders. Examples: Xanax; phobia; apprehension; worry; nervous; xanax; can't relax; diazepam; phobic; anxiety; anxious; worried; agoraphobia; social anxiety
loneliness: aversive state experienced when a discrepancy exi

In [16]:
results

{'1_suicidewatch': {'desire to escape': [0, []],
  'suicidal ideation': [1, ['ready to stop existing']],
  'anxiety': [0, []],
  'loneliness': [0, []]},
 '2_anxiety': {'desire to escape': [0, []],
  'suicidal ideation': [0, []],
  'anxiety': [1, ['anxiety', 'on edge', 'fear']],
  'loneliness': [0, []]},
 '3_anxiety': {'desire to escape': [0, []],
  'suicidal ideation': [0, []],
  'anxiety': [1, ['GAD', 'debilitating anxiety', 'anxiety', 'worried']],
  'loneliness': [0, []]},
 '4_anxiety': {'desire to escape': [0, []],
  'suicidal ideation': [0, []],
  'anxiety': [1, ['Anxiety', 'anxiety', 'anxious', 'anxiety']],
  'loneliness': [0, []]},
 '5_suicidewatch': {'desire to escape': [0, []],
  'suicidal ideation': [1, ['kill me']],
  'anxiety': [0, []],
  'loneliness': [0, []]}}

In [17]:
# Format JSON into CSV
metadata


{'id': 'gen-1758661821-OktCcjjQ4qAmPqZ0EjPM',
 'provider': 'Google',
 'model': 'google/gemini-2.5-flash',
 'object': 'chat.completion',
 'created': 1758661821,
 'choices': [{'logprobs': None,
   'finish_reason': 'stop',
   'native_finish_reason': 'STOP',
   'index': 0,
   'message': {'role': 'assistant',
    'content': '{\n  "desire to escape": [\n    0,\n    []\n  ],\n  "suicidal ideation": [\n    1,\n    [\n      "kill me"\n    ]\n  ],\n  "anxiety": [\n    0,\n    []\n  ],\n  "loneliness": [\n    0,\n    []\n  ]\n}',
    'refusal': None,
    'reasoning': None}}],
 'usage': {'prompt_tokens': 431,
  'completion_tokens': 82,
  'total_tokens': 513,
  'prompt_tokens_details': {'cached_tokens': 0},
  'completion_tokens_details': {'reasoning_tokens': 0, 'image_tokens': 0}}}

# Turn JSON results into a csv
With each document as a row and each category as a column

In [18]:
rows = []
for post_id, categories in results.items():
    row = {'sentence': post_id}
    for category, (score, explanation) in categories.items():
        row[f'{category}_score'] = score
        row[f'{category}_explanation'] = explanation
    rows.append(row)

# Create DataFrame
df = pd.DataFrame(rows)
display(df)
df.to_csv(output_dir+'openrouter_results.csv', index=False)


Unnamed: 0,sentence,desire to escape_score,desire to escape_explanation,suicidal ideation_score,suicidal ideation_explanation,anxiety_score,anxiety_explanation,loneliness_score,loneliness_explanation
0,1_suicidewatch,0,[],1,[ready to stop existing],0,[],0,[]
1,2_anxiety,0,[],0,[],1,"[anxiety, on edge, fear]",0,[]
2,3_anxiety,0,[],0,[],1,"[GAD, debilitating anxiety, anxiety, worried]",0,[]
3,4_anxiety,0,[],0,[],1,"[Anxiety, anxiety, anxious, anxiety]",0,[]
4,5_suicidewatch,0,[],1,[kill me],0,[],0,[]


In [19]:
results

{'1_suicidewatch': {'desire to escape': [0, []],
  'suicidal ideation': [1, ['ready to stop existing']],
  'anxiety': [0, []],
  'loneliness': [0, []]},
 '2_anxiety': {'desire to escape': [0, []],
  'suicidal ideation': [0, []],
  'anxiety': [1, ['anxiety', 'on edge', 'fear']],
  'loneliness': [0, []]},
 '3_anxiety': {'desire to escape': [0, []],
  'suicidal ideation': [0, []],
  'anxiety': [1, ['GAD', 'debilitating anxiety', 'anxiety', 'worried']],
  'loneliness': [0, []]},
 '4_anxiety': {'desire to escape': [0, []],
  'suicidal ideation': [0, []],
  'anxiety': [1, ['Anxiety', 'anxiety', 'anxious', 'anxiety']],
  'loneliness': [0, []]},
 '5_suicidewatch': {'desire to escape': [0, []],
  'suicidal ideation': [1, ['kill me']],
  'anxiety': [0, []],
  'loneliness': [0, []]}}

In [20]:
print('post_id: ', post_id)
print('categories: ', categories)
print('category: ', category)
print('score: ', score)
print('explanation: ', explanation)


post_id:  5_suicidewatch
categories:  {'desire to escape': [0, []], 'suicidal ideation': [1, ['kill me']], 'anxiety': [0, []], 'loneliness': [0, []]}
category:  loneliness
score:  0
explanation:  []


# Ejemplo usando LLMs para obtener propiedades linguisticas en espanol

In [21]:
# Choose which prompt to use
prompt_template = """Para la siguiente texto, devolvé el tiempo, aspecto y modo verbal para cada frase verbal:

{text}

Estructurá tu respuesta en el siguiente formato JSON (sin texto extra):
{{frase_verbal_A: ['tiempo': <tiempo>, 'aspecto': <aspecto>, 'modo': <modo>], frase_verbal_B: [...] }}

JSON:
"""


textos = {'001': 'Sospecho, sin embargo, que no era muy capaz de pensar. Pensar es olvidar diferencias, es generalizar, abstraer. En el abarrotado mundo de Funes no había sino detalles, casi inmediatos.'}

prompt = prompt_template.format(text = textos.get('001'))

final_result, metadata = openrouter_request(prompt, OPENROUTER_API_KEY, model = model, temperature=0)

In [22]:
final_result

{'Sospecho': {'tiempo': 'presente',
  'aspecto': 'imperfectivo',
  'modo': 'indicativo'},
 'era': {'tiempo': 'pretérito imperfecto',
  'aspecto': 'imperfectivo',
  'modo': 'indicativo'},
 'capaz de pensar': {'tiempo': 'infinitivo',
  'aspecto': 'no aplica',
  'modo': 'no aplica'},
 'Pensar': {'tiempo': 'infinitivo',
  'aspecto': 'no aplica',
  'modo': 'no aplica'},
 'es olvidar': {'tiempo': 'presente',
  'aspecto': 'imperfectivo',
  'modo': 'indicativo'},
 'es generalizar': {'tiempo': 'presente',
  'aspecto': 'imperfectivo',
  'modo': 'indicativo'},
 'abstraer': {'tiempo': 'infinitivo',
  'aspecto': 'no aplica',
  'modo': 'no aplica'},
 'había': {'tiempo': 'pretérito imperfecto',
  'aspecto': 'imperfectivo',
  'modo': 'indicativo'}}

In [23]:

resultados = pd.DataFrame(final_result).T.reset_index()
# rename column
resultados = resultados.rename(columns={'index':'frase verbal'})
resultados['texto'] = '001'
resultados

Unnamed: 0,frase verbal,tiempo,aspecto,modo,texto
0,Sospecho,presente,imperfectivo,indicativo,1
1,era,pretérito imperfecto,imperfectivo,indicativo,1
2,capaz de pensar,infinitivo,no aplica,no aplica,1
3,Pensar,infinitivo,no aplica,no aplica,1
4,es olvidar,presente,imperfectivo,indicativo,1
5,es generalizar,presente,imperfectivo,indicativo,1
6,abstraer,infinitivo,no aplica,no aplica,1
7,había,pretérito imperfecto,imperfectivo,indicativo,1


In [24]:
textos.get('001')

'Sospecho, sin embargo, que no era muy capaz de pensar. Pensar es olvidar diferencias, es generalizar, abstraer. En el abarrotado mundo de Funes no había sino detalles, casi inmediatos.'