# Data Gathering and Formatting
---

In [1]:
import pandas as pd
from twitch_chat_scrape import twitch_chat_scrape
import twitch_chat_format as tcf

In [2]:
oauth_path = "../twitch_oauth_token/token.txt"

## Scraping from AdmiralBulldog
---

In [3]:
with open(oauth_path, "r", encoding = "utf-8") as oauth:
    twitch_chat_scrape(nickname = "ticklebits",
                       token = oauth.read(),
                       channel = "admiralbulldog",
                       minutes = 240,
                       path = "../data/chat_admiralbulldog_4_30.log")

----------------------------------------
Checkpoint number 1 at 24.0 minutes.
4374 messages logged!
----------------------------------------
Checkpoint number 2 at 48.0 minutes.
8236 messages logged!
----------------------------------------
Checkpoint number 3 at 72.02 minutes.
12172 messages logged!
----------------------------------------
Checkpoint number 4 at 96.0 minutes.
15803 messages logged!
----------------------------------------
Checkpoint number 5 at 120.01 minutes.
19337 messages logged!
----------------------------------------
Checkpoint number 6 at 144.0 minutes.
23840 messages logged!
----------------------------------------
Checkpoint number 7 at 168.0 minutes.
26465 messages logged!
----------------------------------------
Checkpoint number 8 at 192.03 minutes.
28522 messages logged!
----------------------------------------
Scrape interrupted by user after 194.09 minutes.
28635 messages logged!
----------------------------------------


> Scrape ended early due to the stream ending. 

In [3]:
admiralbulldog_4_30 = tcf.twitch_chat_format("../data/logs/chat_admiralbulldog_4_30.log")

In [4]:
admiralbulldog_4_30.shape

(28844, 3)

In [5]:
admiralbulldog_4_30.head()

Unnamed: 0_level_0,username,channel,message
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-30 08:56:59,collectcalled,admiralbulldog,IF HENRIK WAS AN AniMAL
2019-04-30 08:56:59,laudon,admiralbulldog,gachiHYPER
2019-04-30 08:57:00,hyper_brah,admiralbulldog,WutFace WutFace WutFace WutFace WutFace WutFac...
2019-04-30 08:57:00,felianjo,admiralbulldog,"I LOVE THEM, JUST LET THEM IN CAGES Pepega Clap"
2019-04-30 08:57:00,nevervvinterr,admiralbulldog,gachiHYPER


In [6]:
admiralbulldog_4_30.isna().sum()

username    0
channel     0
message     0
dtype: int64

In [7]:
# Removing the last few hundred messages because 
# they occur after the stream ends

admiralbulldog_4_30 = admiralbulldog_4_30[:len(admiralbulldog_4_30) - 250]

In [8]:
# Removing messages sent by the Auto-moderator, a chat bot.

admiralbulldog_4_30 = tcf.filter_bot_messages(admiralbulldog_4_30, 
                                              bot_name = "admiralbullbot")

In [9]:
admiralbulldog_4_30_mps = tcf.messages_per_second(admiralbulldog_4_30)

Processing 188 minutes of chat messages...
20 out of 188 minutes of messages processed.
40 out of 188 minutes of messages processed.
60 out of 188 minutes of messages processed.
80 out of 188 minutes of messages processed.
100 out of 188 minutes of messages processed.
120 out of 188 minutes of messages processed.
140 out of 188 minutes of messages processed.
160 out of 188 minutes of messages processed.
180 out of 188 minutes of messages processed.
188 out of 188 minutes of messages processed.
...All messages processed.


In [10]:
admiralbulldog_4_30.to_csv("../data/formatted/admiralbulldog_4_30.csv", 
                           index = True)

# admiralbulldog_4_30_exp = pd.DataFrame(admiralbulldog_4_30_mps).
admiralbulldog_4_30_mps.to_csv("../data/formatted/admiralbulldog_4_30_mps.csv", 
                               index = True,
                               header = "mps")

## Scraping from xQcOW
---

In [3]:
with open(oauth_path, "r", encoding = "utf-8") as oauth:
    twitch_chat_scrape(nickname = "ticklebits",
                       token = oauth.read(),
                       channel = "xqcow",
                       minutes = 240,
                       path = "../data/logs/chat_xqcow_5_03.log")

----------------------------------------
Checkpoint number 1 at 24.01 minutes.
9192 messages logged!
----------------------------------------
Checkpoint number 2 at 48.0 minutes.
19066 messages logged!
----------------------------------------
Checkpoint number 3 at 72.01 minutes.
29471 messages logged!
----------------------------------------
Checkpoint number 4 at 96.0 minutes.
40270 messages logged!
----------------------------------------
Checkpoint number 5 at 120.0 minutes.
51912 messages logged!
----------------------------------------
Checkpoint number 6 at 144.0 minutes.
62910 messages logged!
----------------------------------------
Checkpoint number 7 at 168.0 minutes.
72628 messages logged!
----------------------------------------
Checkpoint number 8 at 192.0 minutes.
81878 messages logged!
----------------------------------------
Checkpoint number 9 at 216.0 minutes.
91948 messages logged!
----------------------------------------
Scrape complete after 240.0 minutes.
101158 

In [12]:
xqcow_5_03 = tcf.twitch_chat_format("../data/logs/chat_xqcow_5_03.log")

In [13]:
xqcow_5_03.shape

(107109, 3)

In [14]:
xqcow_5_03.head()

Unnamed: 0_level_0,username,channel,message
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-05-03 08:46:43,mythikow,xqcow,"""LET ME TELL YOU WHAT YOU MEAN"" WeirdChamp ""LE..."
2019-05-03 08:46:43,thisispaule,xqcow,easy troll chat BIG Kappa
2019-05-03 08:46:43,epho__,xqcow,xqcL
2019-05-03 08:46:43,end_my_suffering_xd,xqcow,FeelsStrongMan DONO
2019-05-03 08:46:43,eoin_2,xqcow,THIS PVC DUDE :face_with_tears_of_joy: :OK_han...


In [15]:
xqcow_5_03.isna().sum()

username    0
channel     0
message     0
dtype: int64

In [16]:
xqcow_5_03_mps = tcf.messages_per_second(xqcow_5_03)

Processing 240 minutes of chat messages...
20 out of 240 minutes of messages processed.
40 out of 240 minutes of messages processed.
60 out of 240 minutes of messages processed.
80 out of 240 minutes of messages processed.
100 out of 240 minutes of messages processed.
120 out of 240 minutes of messages processed.
140 out of 240 minutes of messages processed.
160 out of 240 minutes of messages processed.
180 out of 240 minutes of messages processed.
200 out of 240 minutes of messages processed.
220 out of 240 minutes of messages processed.
240 out of 240 minutes of messages processed.
...All messages processed.


In [17]:
xqcow_5_03.to_csv("../data/formatted/xqcow_4_30.csv", 
                  index = True)

xqcow_5_03_mps.to_csv("../data/formatted/xqcow_4_30_mps.csv", 
                      index = True,
                      header = "mps")