In [1]:
### Setting up the Twitch IRC Socket - Part 1: General Info

from Twitch_key import *
import socket

server = 'irc.chat.twitch.tv' #twitch provided
port = 6667 #twitch provided
nickname = 'b_e_green' #your OWN twitch account
token = Twitch_key() #however you'd like to generate your oauth token
channel = '#adinross' #the channel you want to scrape from; change to whomever!

In [2]:
### Pulling comments via the Connection 

import logging
import time
from emoji import demojize


# Create a log file that saves your pull!
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s — %(message)s',
                    datefmt='%Y-%m-%d_%H:%M:%S',
                    handlers=[logging.FileHandler('chat.log', encoding='utf-8')])


# Connect to Socket
sock = socket.socket()
sock.connect((server, port))

sock.send(f"PASS {token}\n".encode('utf-8'))
sock.send(f"NICK {nickname}\n".encode('utf-8'))
sock.send(f"JOIN {channel}\n".encode('utf-8'))


# Loop for pull - We included a timer to run for a certain amount of seconds
start_time = time.time()
seconds = input("Enter: ")
seconds = int(seconds)
while True:
    current_time = time.time()
    elapsed_time = current_time - start_time

    resp = sock.recv(2048).decode('utf-8')

    if resp.startswith('PING'):
        sock.send("PONG\n".encode('utf-8'))
    
    elif len(resp) > 0:
        logging.info(demojize(resp))
        
    if elapsed_time > seconds:
        print("Finished iterating in: " + str(int(elapsed_time))  + " seconds")
        break
        sock.close() #closes the socket after your time runs out

Enter: 25
Finished iterating in: 25 seconds


In [3]:
### Generate a dataframe from the comments you pulled

import pandas as pd
from datetime import datetime
import re

def get_chat_dataframe(file):
    data = []

    with open(file, 'r', encoding='utf-8') as f:
        lines = f.read().split('\n\n')
        
        for line in lines:
           # print(line)
            try:
                time_logged = line.split('—')[0].strip()
                time_logged = datetime.strptime(time_logged, '%Y-%m-%d_%H:%M:%S')

                username_message = line.split('—')[1:]
                username_message = '—'.join(username_message).strip()

                username, channel, message = re.search(
                    ':(.*)\!.*@.*\.tmi\.twitch\.tv PRIVMSG #(.*) :(.*)', username_message
                ).groups()

                d = {
                    'dt': time_logged,
                    'channel': channel,
                    'username': username,
                    'message': message
                }

                data.append(d)
            
            except Exception:
                pass
            
    return pd.DataFrame(data)
        

In [4]:
df = get_chat_dataframe('chat.log')

2021-09-05_01:11:43 — :tmi.twitch.tv 001 b_e_green :Welcome, GLHF!
:tmi.twitch.tv 002 b_e_green :Your host is tmi.twitch.tv
:tmi.twitch.tv 003 b_e_green :This server is rather new
:tmi.twitch.tv 004 b_e_green :-
:tmi.twitch.tv 375 b_e_green :-
:tmi.twitch.tv 372 b_e_green :You are in a maze of twisty passages, all alike.
:tmi.twitch.tv 376 b_e_green :>
:b_e_green!b_e_green@b_e_green.tmi.twitch.tv JOIN #adinross
:b_e_green.tmi.twitch.tv 353 b_e_green = #adinross :b_e_green
:b_e_green.tmi.twitch.tv 366 b_e_green #adinross :End of /NAMES list
:merkbodiezz!merkbodiezz@merkbodiezz.tmi.twitch.tv PRIVMSG #adinross :Scared tf outta me ngl
:a2huncho!a2huncho@a2huncho.tmi.twitch.tv PRIVMSG #adinross :shit scared me
:royalmunchies!royalmunchies@royalmunchies.tmi.twitch.tv PRIVMSG #adinross :Title ?
:rehxndle!rehxndle@rehxndle.tmi.twitch.tv PRIVMSG #adinross :WW
:zayx_x!zayx_x@zayx_x.tmi.twitch.tv PRIVMSG #adinross :fat
:djzero7!djzero7@djzero7.tmi.twitch.tv PRIVMSG #adinross :BibleThump BibleThum

In [5]:
### Take a look at your dataframe!

#df.set_index('dt', inplace=True) #use if you want the index to be the time

print(df.shape)

df.head()

(26, 4)


Unnamed: 0,dt,channel,username,message
0,2021-09-05 01:11:43,adinross,merkbodiezz,Scared tf outta me ngl
1,2021-09-05 01:11:44,adinross,alfonsodone,DAMN THAT GOT ME LMAOOO
2,2021-09-05 01:11:45,adinross,jkefz,MY SOUL JUST LEFT
3,2021-09-05 01:11:45,adinross,nightbot,yrgbrazyy has been following AdinRoss for 5 mo...
4,2021-09-05 01:11:45,adinross,alexvlone999,!watchtike
