In [11]:
import argparse
import configparser
import dateutil
import json
import logging
import os
import sys

import pandas as pd
from pymongo import MongoClient

logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

# parser = argparse.ArgumentParser()
# parser.add_argument(
#     "-c", "--config", type=str, default="settings.conf", help="Path to config file."
# )
# args = parser.parse_args()

# config_file = args.config

config_file = 'settings.conf'

config = configparser.RawConfigParser()
config.read(config_file)

df_header = [
    ('timestamp', 'timestamp'),
    ('user_name', 'author.name'),
    ('user_id', 'author.id'),
    ('bot', 'author.isBot'),
    ('channel', 'channel.name'),
    ('message', 'content'),
    ('message_id', 'id')
]

In [28]:
def create_dateframe(query={}):
    mongo_client = MongoClient(config["mongodb"]["connection_string"])
    mongo_db = mongo_client[config["mongodb"]["db"]]
    mongo_coll = mongo_db[config["mongodb"]["collection"]]

    docs = mongo_coll.find(query)

    data = {
        'timestamp': [],
        'user_name': [],
        'user_id': [],
        'bot': [],
        'channel': [],
        'message': [],
        'message_id': []
    }

    for msg in docs:
        data['timestamp'].append(msg['timestamp'])
        data['user_name'].append(msg['author']['name'])
        data['user_id'].append(msg['author']['id'])
        data['bot'].append(msg['author']['isBot'])
        data['channel'].append(msg['channel']['name'])
        data['message'].append(msg['content'])
        data['message_id'].append(msg['id'])

    df = pd.DataFrame(data)

    return df


dataframe = create_dateframe(query={})

print(dataframe)

                    timestamp      user_name             user_id    bot  \
0     2021-08-16 18:36:43.134    John_Wukong  852553468299968582  False   
1     2021-08-16 18:37:08.700    John_Wukong  852553468299968582  False   
2     2021-08-17 00:32:29.570  IBuyShitCoins  827007094469296188  False   
3     2021-08-17 00:32:42.596  IBuyShitCoins  827007094469296188  False   
4     2021-08-17 00:32:56.867  IBuyShitCoins  827007094469296188  False   
...                       ...            ...                 ...    ...   
10137 2021-12-20 15:05:22.203    John_Wukong  852553468299968582  False   
10138 2021-12-20 21:32:55.899  IBuyShitCoins  827007094469296188  False   
10139 2021-12-20 21:58:37.834    John_Wukong  852553468299968582  False   
10140 2021-12-20 21:59:51.141    John_Wukong  852553468299968582  False   
10141 2021-12-20 22:43:57.277    John_Wukong  852553468299968582  False   

                 channel                                            message  \
0      1mil-yacht-ra

In [30]:
users = []
for name in dataframe['user_name'].unique():
    if '#' not in name:
        users.append(name)

print(users)

['John_Wukong', 'IBuyShitCoins', 'DotWar', 'jannis', 'KnottyFungi', 'marty', 'tip.cc', 'Tubbs', 'ChasseuR', 'grafx2g', 'ren987', 'TsukiBot', 'CryptoKing', 'PerfectGent', 'onthefly', 'Rax', 'mchileh', 'Allah', 'Fritzy', 'Alcarda', 'souljah11', 'MeatPuddles', 'Lio', 'disynthetic']


In [31]:
for user in users:
    user_data = dataframe[dataframe['user_name'] == user]
    print(user_data)

                    timestamp    user_name             user_id    bot  \
0     2021-08-16 18:36:43.134  John_Wukong  852553468299968582  False   
1     2021-08-16 18:37:08.700  John_Wukong  852553468299968582  False   
9     2021-08-17 07:31:12.658  John_Wukong  852553468299968582  False   
10    2021-08-17 07:33:11.747  John_Wukong  852553468299968582  False   
11    2021-08-17 07:39:42.688  John_Wukong  852553468299968582  False   
...                       ...          ...                 ...    ...   
10136 2021-12-20 15:05:05.159  John_Wukong  852553468299968582  False   
10137 2021-12-20 15:05:22.203  John_Wukong  852553468299968582  False   
10139 2021-12-20 21:58:37.834  John_Wukong  852553468299968582  False   
10140 2021-12-20 21:59:51.141  John_Wukong  852553468299968582  False   
10141 2021-12-20 22:43:57.277  John_Wukong  852553468299968582  False   

                 channel                                            message  \
0      1mil-yacht-racing  So, when do we cre