# BERT Tone Indicators Classification Model 






In [None]:
!pip install transformers --quiet
!pip install emoji --quiet
!pip install langdetect --quiet
!pip install torchviz --quiet

  Building wheel for torchviz (setup.py) ... [?25l[?25hdone


In [None]:
import os

import numpy as np
print('numpy', np.__version__)
import pandas as pd
print('pandas', pd.__version__)
from langdetect import detect
import random
import re

import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
#from torchtext.data import Field, TabularDataset, BucketIterator, Iterator

from transformers import AutoTokenizer, BertTokenizer, BertForSequenceClassification

from tqdm import tqdm #progress bar

# Evaluation

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

numpy 1.21.6
pandas 1.3.5


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd /content/drive/My Drive/Twitter

/content/drive/.shortcut-targets-by-id/16W2OVxf4pSh-3dRxed_aDRKcUuYcKpsj/Twitter


In [None]:
import time
import datetime

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
from torch import cuda

device = 'cuda' if cuda.is_available() else 'cpu'
print('Device count:', cuda.device_count())
print('Using device:', device)

#Additional Info when using cuda
if device == 'cuda':
    print(torch.cuda.get_device_name(0))
    print(torch.cuda.get_device_properties(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')

Device count: 1
Using device: cuda
Tesla K80
_CudaDeviceProperties(name='Tesla K80', major=3, minor=7, total_memory=11441MB, multi_processor_count=13)
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB




# Load tweets from file

In [None]:
data = pd.read_csv('all_tweets_english.tsv', delimiter='\t', index_col=0, on_bad_lines='skip')
print(data.columns)
data

Index(['text', 'srs', 'nbh', 'hyp', 'pos', 'neg'], dtype='object')


Unnamed: 0_level_0,text,srs,nbh,hyp,pos,neg
tid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1491924924386230287,RT @malewifealtair: @sobokify /srs.,1,0,0,0,0
1491924858493620227,i just swallowed a small gemstone /srs /pos,1,0,0,1,0
1491924846242152457,@bonsorlol no bc it genuinely had great themes...,0,0,0,1,0
1491924808476422144,registering for classes /neg 😵‍💫😵‍💫😵‍💫 i am so...,0,0,0,0,1
1491924691925311489,I found out about fnf a week or two after it c...,1,0,0,0,0
...,...,...,...,...,...,...
1495361726807560194,always show up on my tl /pos but were not clos...,0,0,0,1,0
1495361724601417729,screams /neg,0,0,0,0,1
1495361646067392520,@walterhillerska i hate gay people /srs,1,0,0,0,0
1495361617466621955,are u comfy with this user ?? /srs.,1,0,0,0,0


In [None]:
# sum_data = data['srs']+data['nbh']+data['hyp']+data['pos']+data['neg']
# count = 0
# for i, c in enumerate(sum_data):
#   if c >1:
#     print(i, c, data['text'][i])
#     count +=1
# print(count)

In [None]:
#rate of more than 1 label
print('The rate of tweets with more than 1 tags:', 111/10776)

The rate of tweets with more than 1 tags: 0.01030066815144766


# Data Preprocessing


In [None]:
contain_values = data[data['text'].str.contains('/srs|/nbh|/hyp|/pos|/neg')]
contain_values["text"] = contain_values["text"].str.replace(r'\s*https?://\S+(\s+|$)', ' ').str.strip()
pd.options.mode.chained_assignment = None  # default='warn'
labeled = contain_values
labeled["srs"] = np.where(labeled["text"].str.contains('/srs'), 1, 0)
labeled["nbh"] = np.where(labeled["text"].str.contains('/nbh'), 1, 0)
labeled["hyp"] = np.where(labeled["text"].str.contains('/hyp'), 1, 0)
labeled["pos"] = np.where(labeled["text"].str.contains('/pos'), 1, 0)
labeled["neg"] = np.where(labeled["text"].str.contains('/neg'), 1, 0)
data = labeled
print(data)

  


                                                                  text  srs  \
tid                                                                           
1491924924386230287                RT @malewifealtair: @sobokify /srs.    1   
1491924858493620227        i just swallowed a small gemstone /srs /pos    1   
1491924846242152457  @bonsorlol no bc it genuinely had great themes...    0   
1491924808476422144  registering for classes /neg 😵‍💫😵‍💫😵‍💫 i am so...    0   
1491924691925311489  I found out about fnf a week or two after it c...    1   
...                                                                ...  ...   
1495361726807560194  always show up on my tl /pos but were not clos...    0   
1495361724601417729                                       screams /neg    0   
1495361646067392520            @walterhillerska i hate gay people /srs    1   
1495361617466621955                are u comfy with this user ?? /srs.    1   
1495361430178074626        damn he actually did kiss

Uncomment and run the next cell if your dataset is not filtered for only english tweets containing tone indictors. Otherwise if your data is clean, skip it 

In [None]:
# The fastest runtime comes from applying this function to the entire df
"""
def detect2(row):
  res = "error"
  try:
    res = detect(row)
    return res
  except:
    return res
"""

# Example Run:
'''
df = pd.read_csv("in.tsv.tsv",sep='\t')
contain_values = df[df['text'].str.contains('/srs|/nbh|/hyp|/pos|/neg')]
contain_values["text"] = contain_values["text"].str.replace(r'\s*https?://\S+(\s+|$)', ' ').str.strip()
df_new = contain_values[contain_values.text.apply(detect2).eq('en')]
print(df_new)
df_new.to_csv('out.tsv', sep="\t")
'''

#First we exclude rows that don't have the tags, then exclude rows that have URLs
#Use detect2 for the final frame and convert to tsv

'''
df_new = data[data.text.apply(detect2).eq('en')]
print(df_new)
data = df_new
'''

"\ndf_new = data[data.text.apply(detect2).eq('en')]\nprint(df_new)\ndata = df_new\n"

In [None]:
# sava cleaned data 
'''
print(df_new)
df_new.to_csv('all_tweets_english.tsv', sep="\t")
'''


'\nprint(df_new)\ndf_new.to_csv(\'all_tweets_english.tsv\', sep="\t")\n'

## Tokenization

In [None]:
#remove the tweets with more than 1 tag
tones = ['srs', 'nbh', 'hyp', 'pos', 'neg']

filter = data[tones].sum(axis=1)==1
data = data[filter].reset_index(drop=True)
print(data.info())
print(data[tones].sum(axis=0))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147807 entries, 0 to 147806
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   text    147807 non-null  object
 1   srs     147807 non-null  int64 
 2   nbh     147807 non-null  int64 
 3   hyp     147807 non-null  int64 
 4   pos     147807 non-null  int64 
 5   neg     147807 non-null  int64 
dtypes: int64(5), object(1)
memory usage: 6.8+ MB
None
srs    51329
nbh     4080
hyp      105
pos    78147
neg    14146
dtype: int64


In [None]:
# this can take 1-2 minutes to run
data['tag']=data[tones].apply(lambda row: row[row==1].index[0], axis=1)

label2ind = {tag: tones.index(tag) for tag in tones}
tags_token= data['tag'].apply(lambda x: label2ind[x])

print(data)

                                                     text  srs  nbh  hyp  pos  \
0                     RT @malewifealtair: @sobokify /srs.    1    0    0    0   
1       @bonsorlol no bc it genuinely had great themes...    0    0    0    1   
2       registering for classes /neg 😵‍💫😵‍💫😵‍💫 i am so...    0    0    0    0   
3       I found out about fnf a week or two after it c...    1    0    0    0   
4                                   @malewifealtair /srs?    1    0    0    0   
...                                                   ...  ...  ...  ...  ...   
147802  always show up on my tl /pos but were not clos...    0    0    0    1   
147803                                       screams /neg    0    0    0    0   
147804            @walterhillerska i hate gay people /srs    1    0    0    0   
147805                are u comfy with this user ?? /srs.    1    0    0    0   
147806        damn he actually did kiss the ice im 😭 /pos    0    0    0    1   

        neg  tag  
0       

In [None]:
# replace all the tags in texts with [unknown token]
data['text'] = data['text'].str.replace('/srs','TONE_INDICATOR')
data['text'] = data['text'].str.replace('/nbh','TONE_INDICATOR')
data['text'] = data['text'].str.replace('/pos','TONE_INDICATOR')
data['text'] = data['text'].str.replace('/hyp','TONE_INDICATOR')
data['text'] = data['text'].str.replace('/neg','TONE_INDICATOR')

In [None]:
#tokenize sentences
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
print("length before new token", len(tokenizer))
#tokenizer.add_tokens(["TONE_INDICATOR"])
print("length after new token", len(tokenizer))

tweets = data['text']
print(tweets[23])
print(' Original: ', tweets[23])
print('Tokenized: ', tokenizer.tokenize(tweets[23]))
print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(tweets[23])))

Downloading:   0%|          | 0.00/558 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/824k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.03M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


length before new token 64001
length after new token 64001
im going to cry we didnt look at eachother or even fucking look at eachother :// TONE_INDICATOR
 Original:  im going to cry we didnt look at eachother or even fucking look at eachother :// TONE_INDICATOR
Tokenized:  ['im', 'going', 'to', 'cry', 'we', 'didnt', 'look', 'at', 'eachother', 'or', 'even', 'fucking', 'look', 'at', 'eachother', ':@@', '/@@', '/', 'TON@@', 'E_@@', 'IND@@', 'IC@@', 'ATOR']
Token IDs:  [199, 117, 9, 866, 54, 1403, 184, 35, 9317, 72, 132, 309, 184, 35, 9317, 1043, 2603, 75, 16457, 35637, 16065, 2965, 22679]


In [None]:
#get max length of sentence token
max_len = 0
count = 0

for twt in tweets:
    input_ids = tokenizer.encode(twt, add_special_tokens=True) #add `[CLS]` and `[SEP]` tokens.
    max_len = max(max_len, len(input_ids))

print('Max sentence length: ', max_len)
# tokenizer.max_len_sentences_pair, tokenizer.max_len_single_sentence

Token indices sequence length is longer than the specified maximum sequence length for this model (165 > 128). Running this sequence through the model will result in indexing errors


Max sentence length:  284


In [None]:
# encode tokenized sentences 

input_ids = []
attention_masks = []

for twt in tweets:
    encoded_dict = tokenizer.encode_plus(
                        twt,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 124,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )
        
    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])


input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(tags_token)

# Print sentence 0, now as a list of IDs.
print('Original: ', tweets[0])
print('Token IDs:', input_ids[0])



Original:  RT @malewifealtair: @sobokify TONE_INDICATOR.
Token IDs: tensor([    0,   246,  5238,  4151,   515,  2583, 15590,   836,  1257,  1784,
           22,  5238,  2266,  1384,   409, 16044, 16457, 35637, 16065,  2965,
         3368,  3447,     4,     2,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,    

# Train/test split, dataloader

In [None]:
from torch.utils.data import TensorDataset, random_split

dataset = TensorDataset(input_ids, attention_masks, labels)

# Create a 90-10 train-validation split.
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

print('{:>5,} training samples'.format(train_size))
print('{:>5,} validation samples'.format(val_size))

133,026 training samples
14,781 validation samples


In [None]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

# The DataLoader needs to know our batch size for training, so we specify it 
# here. For fine-tuning BERT on a specific task, the authors recommend a batch 
# size of 16 or 32.
batch_size = 32

# Create the DataLoaders for our training and validation sets.
# We'll take training samples in random order. 
train_dataloader = DataLoader(
            train_dataset,  # The training samples.
            sampler = RandomSampler(train_dataset), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )

# For validation the order doesn't matter, so we'll just read them sequentially.
validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

# Model
model architecture:
pretrained-BERT + classifier layer

In [None]:
class BERT_Arch(nn.Module):
    def __init__(self, bert, output_size):
        super(BERT_Arch, self).__init__()

        self.bert = bert
        
        self.fc1 = nn.Linear(768,512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, output_size)
        
        #activation functions
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.1)
        self.relu1 =  nn.ReLU()
        self.relu2 =  nn.ReLU()
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, sent_id, mask):
        
        bert_out = self.bert(sent_id, attention_mask=mask).pooler_output #(batch_size, hidden_size)
      
        x = self.fc1(bert_out)
        x = self.relu1(x)                   
        x = self.dropout1(x)

        x = self.fc2(x)
        x = self.relu2(x)             
        x = self.dropout2(x)
                     
        x = self.fc3(x)
        x = self.softmax(x)
                     
        return x


In [None]:
!CUDA_LAUNCH_BLOCKING=1

In [None]:
from transformers import AutoModel, BertModel, AdamW, BertConfig
#load from pretrained
#bert_pretrained = BertModel.from_pretrained('bert-base-uncased')
bert_pretrained = AutoModel.from_pretrained("vinai/bertweet-base")
bert_pretrained.resize_token_embeddings(len(tokenizer)) 

Downloading:   0%|          | 0.00/517M [00:00<?, ?B/s]

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Embedding(64001, 768, padding_idx=1)

In [None]:
model = BERT_Arch(bert_pretrained, output_size=len(tones))

#print(model.embeddings.word_embeddings.weight[-1, :])

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(),
                  lr = 2e-5,
                  eps = 1e-8
                )

model = model.to(device)
print(model)




BERT_Arch(
  (bert): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(64001, 768, padding_idx=1)
      (position_embeddings): Embedding(130, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), ep

### Accuracy Functions

In [None]:
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc * 100)
    
    return acc

## Training/validating wrapper functions

In [None]:
def training(train_dataloader, model):
    
    print('Training...')

    t0 = time.time()

    total_train_loss = 0
    total_train_accuracy = 0
    
    model.train() 

    loop = tqdm(enumerate(train_dataloader), total=len(train_dataloader))
    
    for step, batch in loop:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        optimizer.zero_grad() 

        ### forward pass ###
        b_train_pred = model(b_input_ids, mask=b_input_mask)
        
        loss = criterion(b_train_pred, b_labels)
        accuracy = multi_acc(b_train_pred, b_labels)
        
        total_train_loss += loss.item() * b_input_ids.shape[0]
        total_train_accuracy += accuracy.item()* b_input_ids.shape[0]
        
        ### backward pass ###
        loss.backward()
#         torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        # scheduler.step()
        loop.set_postfix(loss=loss.item(), acc=accuracy.item())
        
    avg_train_accuracy = total_train_accuracy / train_dataloader.dataset.__len__()
    avg_train_loss = total_train_loss / train_dataloader.dataset.__len__()        
    training_time = format_time(time.time() - t0)
    
    print("  Accuracy: {0:.2f}".format(avg_train_accuracy))
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(training_time))
    

    return avg_train_accuracy, avg_train_loss, training_time

In [None]:
def validating(validation_dataloader, model):
    '''return validation accuracy, loss and time'''

    print("Running Validation...")

    t0 = time.time()

    model.eval()

    # Tracking variables 
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    for batch in tqdm(validation_dataloader):
        
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        
        with torch.no_grad():        
            b_train_pred = model(b_input_ids, mask=b_input_mask)
        
            loss = criterion(b_train_pred, b_labels)
            accuracy = multi_acc(b_train_pred, b_labels)
            
        total_eval_loss += loss.item()*b_input_ids.shape[0]
        total_eval_accuracy += accuracy.item()*b_input_ids.shape[0]
     
    
    avg_val_accuracy = total_eval_accuracy / validation_dataloader.dataset.__len__()
    avg_val_loss = total_eval_loss / validation_dataloader.dataset.__len__()
    validation_time = format_time(time.time() - t0)
    
    print("  Accuracy: {0:.2f}".format(avg_val_accuracy))    
    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))
    
    return avg_val_accuracy, avg_val_loss, validation_time


# Main training procedure
reccomended to run on GPU if available

In [None]:
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

training_stats = []

epochs = 10

total_t0 = time.time() 

for epoch_i in range(0, epochs):
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    ## =================== Training =================== ##
    avg_train_accuracy, avg_train_loss, training_time = training(train_dataloader, model)
        
    ## =================== Validation =================== ##
    avg_val_accuracy, avg_val_loss, validation_time = validating(validation_dataloader, model)
    
    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Training Accur.': avg_train_accuracy,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
            'Validation Time': validation_time,
        }
    )
    
    #save model
torch.save({
      'epoch': epoch_i+1,
      'model_state_dict': model.state_dict(),
      'bert_state_dict': model.bert.state_dict(), #save the finetuned bert
      'optimizer_state_dict': optimizer.state_dict(),
      'avg_train_loss': avg_train_loss,
      }, f'./checkpoint_{epoch_i+1}.pt')

print("Training complete!")

print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))

# Analysis and visualizations
code for visualizing attention and other things


In [None]:
from torchviz import make_dot, make_dot_from_trace

In [None]:
sentence_a = "The cat sat on the mat"
sentence_b = "The cat lay on the rug"
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt')
input_ids = inputs['input_ids'].to(device)
token_type_ids = inputs['token_type_ids'].to(device)
mask = inputs['attention_mask'].to(device)

attention = model(input_ids, mask)

In [None]:
param_dict = {}
for name, param in model.named_parameters():
    if param.requires_grad:
        param_dict[name] = param
    

In [None]:
make_dot(attention, params=param_dict, show_attrs=True, show_saved=True).render("attached", format="jpeg")

dot: graph is too large for cairo-renderer bitmaps. Scaling by 0.735692 to fit
tcmalloc: large alloc 1111982080 bytes == 0x5612c55ea000 @  0x7fde83ac7001 0x7fde7fd5e1fa 0x7fde7fd5e2ad 0x7fde80ee96df 0x7fde813e0261 0x7fde8385d468 0x7fde8385fd53 0x5612c2abd092 0x7fde83212c87 0x5612c2abd12a


'attached.jpeg'