<a href="https://colab.research.google.com/github/hellomikelo/hackathon-cohere-qdrant/blob/dev-prototype/discord_chat_collection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Discord chat history data

This notebook uses a Discord bot to collect channel chat history to for creating text embeddings that will be used to create a vector search engine.

Steps: 
1. Set up bot to take in new messages
1. embed new message 
2. ask chatbot to find messages related to an idea (e.g. "who likes to bake cake?")
3. chatbot embeds the message and search the vector database using cosine similarity
4. chatbot returns top 3 relevant messages with links to the messages

References: 
* [discord.py home](https://discordpy.readthedocs.io/en/stable/index.html#)
* [API reference](https://discordpy.readthedocs.io/en/stable/api.html)
* [Minimal bot quickstart](https://discordpy.readthedocs.io/en/stable/quickstart.html)
* [Creating a Bot Account](https://discordpy.readthedocs.io/en/stable/discord.html#discord-intro)

In [None]:
from google.colab import drive
from dotenv import load_dotenv
import os
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
!pip install -q -U py-cord datasets qdrant_client=="0.11.0" cohere python-dotenv
!cp /content/drive/MyDrive/env/vars.env /content/.env

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/1.1 MB[0m [31m8.2 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.1 MB[0m [31m18.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
load_dotenv(override=True)

True

Create function to get channel chat history and save the messages as a CSV. This uses the [Message components](https://discordpy.readthedocs.io/en/stable/api.html#message) .

In [None]:
%%writefile gethistory.py
import discord
import pandas as pd
import os

intents = discord.Intents.default()
intents.message_content = True

client = discord.Client(intents=intents)
guild = discord.Guild

@client.event
async def on_message(message):
    if message.author == client.user:
        return
    elif message.content.startswith('_'):
        cmd = message.content.split()[0].replace("_","")
        if len(message.content.split()) > 1:
            parameters = message.content.split()[1:]

        if cmd == 'scan':

            data = pd.DataFrame(columns=['content', 'time', 'author', 'jump_url'])
            
            def is_command(msg): # Checking if the message is a command call
                if len(msg.content) == 0:
                    return False
                elif msg.content.split()[0] == '_scan':
                    return True
                else:
                    return False

            # As an example, I've set the limit to 10000
            async for msg in message.channel.history(limit=10000): 
                if msg.author != client.user:                        
                    if not is_command(msg):                          
                        data = data.append({'content': msg.content,
                                            'time': msg.created_at,
                                            'author': msg.author.name,
                                            'jump_url': msg.jump_url,
                                            }, ignore_index=True)
                    #if len(data) == limit:
                    #    break
            
            file_location = "data.csv" # Set the string to where you want the file to be saved to
            data.to_csv(file_location)
            print(f'Chat history saved to {file_location}')

client.run(os.getenv('DISCORD_TOKEN'))

Writing gethistory.py


In [None]:
# !python3 example.py
!python3 gethistory.py

Create event-driven bot to take user prompt, query relevant results, and generate a response to feedback to the user.

In [None]:
%%writefile example.py
# This example requires the 'message_content' intent.
import discord
import os

class MyClient(discord.Client):
    async def on_ready(self):
        print(f'Logged on as {self.user}!')

    async def on_message(self, message):
        print(f'Message from {message.author}: {message.content}')


intents = discord.Intents.default()
intents.message_content = True

client = MyClient(intents=intents)
client.run(os.getenv('DISCORD_TOKEN'))


Overwriting example.py


In [None]:
import time
time.

In [None]:
%%writefile dsend.py
import discord
import pandas as pd
import os
    
intents = discord.Intents.default()
intents.message_content = True

client = discord.Client(intents=intents)


@client.event
async def on_ready():
    for s in 'this is sent from Colab'.split():
        await client.get_channel(1084864988688154627).send(s)


client.run(os.getenv('DISCORD_TOKEN'))

Overwriting dsend.py


In [None]:
!python3 dsend.py

[30;1m2023-03-14 00:57:43[0m [34;1mINFO    [0m [35mdiscord.client[0m logging in using static token
[30;1m2023-03-14 00:57:43[0m [34;1mINFO    [0m [35mdiscord.gateway[0m Shard ID None has connected to Gateway (Session ID: 3234d7569aedae26ed9aee17b7772457).
