In [27]:
# general imports
import numpy as np
import pandas as pd
import os
import random
import json

In [28]:
# torch imports
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
from torch.utils.data import Dataset, DataLoader
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from glob import glob

In [40]:
class MessageDataset(Dataset):

    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.files = glob(root_dir + '/*.json')
        self.messages = []
        for file in self.files:
            with open(file, 'r', encoding='utf-8') as f:
                self.messages.extend([j for j in json.load(f)['messages']])

    def __len__(self):
        return len(self.messages)

    def __getitem__(self, idx):
        if torch.is_tensor(idx): idx = idx.tolist()

        # convert message to something that pytorch can use
        if type(idx) == list: return [self.messages[i] for i in idx]
        return self.messages[idx]

In [41]:
dms = MessageDataset('E:/Discord/DMs')
dms[[1,2,3]]

[{'id': '820074102106751026',
  'type': 'Default',
  'timestamp': '2021-03-12T23:21:42.182+00:00',
  'timestampEdited': None,
  'callEndedTimestamp': None,
  'isPinned': False,
  'content': 'At the gym',
  'author': {'id': '259197192496087040',
   'name': 'Cuddlesworth',
   'discriminator': '1322',
   'nickname': 'Cuddlesworth',
   'color': None,
   'isBot': False,
   'avatarUrl': 'https://cdn.discordapp.com/avatars/259197192496087040/b59ab25a0223dcc446694fdd72d675f0.png?size=40'},
  'attachments': [],
  'embeds': [],
  'reactions': [],
  'mentions': []},
 {'id': '820074132300365896',
  'type': 'Default',
  'timestamp': '2021-03-12T23:21:49.381+00:00',
  'timestampEdited': None,
  'callEndedTimestamp': None,
  'isPinned': False,
  'content': 'Oh okey have fun!',
  'author': {'id': '437644030386372611',
   'name': 'darkrabbit97',
   'discriminator': '1994',
   'nickname': 'darkrabbit97',
   'color': None,
   'isBot': False,
   'avatarUrl': 'https://cdn.discordapp.com/avatars/43764403038

In [None]:
# set for reproducability
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [None]:
SEED = 69
seed_everything(SEED)
device = torch.device(0) if torch.cuda.is_available() else 'cpu'
ROOT_PATH = 'E:/Discord/'


In [None]:
# explore data
with open('E:/Discord/DMs/Direct Messages - Private - Σκύθης [334867363755196426].json', 'r', encoding='utf-8') as f:
    data = json.load(f)
data = data['messages'][54]
data

In [None]:
# id's of some people in my messages
my_id = '259197192496087040'
id_dict = {
    '259197192496087040':'cuddles',
    '334867363755196426':'skuthes',
    '688542990251458619':'sarah',
    '262039783709081600':'lumis',
    '315951640853348353':'the_moderator',
    '820074055234224198':'rabbit',
    '827346598660538369':'kilnard',
    '618312371210944513':'drjeesh',
    '534856325851512856':'daniel_kun',
    '375858437516558346':'sage_of_ice',
    None:'None'
}
id_cat_dict = {k:v for v,k in enumerate(id_dict.keys())}

In [None]:
def get_cat_id(message:dict, id_cat_dict=id_cat_dict):
    k = message['id']
    if k not in id_cat_dict: k = None
    return id_cat_dict[k]