# Introduction.
Describe the project here.

# Jupyter Settings
The default Jupyter settings are a bit annoying.  Let's fix that.

In [None]:
%config IPCompleter.greedy=False

# From https://stackoverflow.com/a/34058270/7077511
from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import pandas as pd
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = 200

# Globals and Utility Code
For the sake of convenience, I'm going to put some global functions and data here.

In [None]:
import re
import os
import json
import glob
import dateutil.parser
from typing import Optional, Union


class utils:
    # Path constants.
    DATA_DIR = R'C:\Users\dma\Documents\twitch-analytics\data'
    
    @staticmethod
    def rel_data(relpath: str):
        return os.path.join(utils.DATA_DIR, relpath)
    
    # Matches either YouTube style ("PT52M3S") or Twitch style ("4h10m52s").
    _re_duration = re.compile(R'(PT)?((\d+)[Hh])?((\d+)[Mm])?((\d+)[Ss])?')
    @staticmethod
    def parse_duration(data: str):
        if (m := utils._re_duration.match(data)):
            duration = timedelta(
                hours   = int(m.group(3)) if m.group(3) else 0,
                minutes = int(m.group(5)) if m.group(5) else 0, 
                seconds = int(m.group(7)) if m.group(7) else 0)

            return duration
        
        return None

    @staticmethod
    def make_int(data: Union[str, int, None]) -> Optional[int]:
        if (data is None) or (len(data) == 0):
            return None

        return int(data)

    @staticmethod
    def make_str(data: str) -> Optional[str]:
        if (data is None):
            return None

        r = data.rstrip()
        if (len(data) == 0):
            return None

        return r


In [None]:
# https://store.steampowered.com/api/appdetails?appids=41014
# https://steamdb.info/app/543260/config/
import os
import json

# https://store.steampowered.com/dynamicstore/userdata/
with open("data/my-games.json") as json_file:
    my_games = json.load(json_file)

# https://api.steampowered.com/ISteamApps/GetAppList/v0002/?format=json
with open("data/steam-games.json", encoding="utf-8") as json_file:
    steam_games = json.load(json_file)


In [None]:
done = set([220,7670,21090,22330,70300,72850,201810,205100,221100,264710,362890,379720,397950,403640,480490,782330,848450])

apps = dict((x["appid"], x["name"]) for x in steam_games["applist"]["apps"])
todo = dict([(x, apps.get(x, None)) for x in my_games["rgOwnedApps"] if x not in done])

for x in todo:
    if todo[x]:
        print(todo[x].strip(), x, sep="|")

In [None]:
all_twitch_games = {
  "data": [
      {
        "box_art_url": "https://static-cdn.jtvnw.net/ttv-boxart/506442_IGDB-{width}x{height}.jpg",
        "id": "506442",
        "name": "DOOM Eternal"
      },
      {
        "box_art_url": "https://static-cdn.jtvnw.net/ttv-boxart/511212_IGDB-{width}x{height}.jpg",
        "id": "511212",
        "name": "Subnautica: Below Zero"
      }
  ]
}
atg = dict([(int(x["id"]), x) for x in all_twitch_games['data']])

stuff=[]

import urllib.parse

for x in stuff:
    g = atg.get(x)
    print(x, g["name"], g["box_art_url"], "https://www.twitch.tv/directory/game/" + urllib.parse.quote(g["name"]), sep="|")

In [None]:
import csv

with open("data/my-games.txt", encoding="utf-8") as txt_file:
    df = pd.read_csv(txt_file, sep='\t', header=0, dtype={"youtube_url": 'str'})

display(df)

In [None]:
# A mapping of process names to Twitch game IDs.
PROC_GAMES = {
    "BioshockHD.exe":                       15866,          # Actually it's the remaster (2009742488), but who cares?
    "Clustertruck.exe":                     491398,
    "DOOMEternalx64vk.exe":                 506442,
    "DOOMx64.exe":                          6715,
    "DayZ_x64.exe":                         65632,
    "Dead Space.exe":                       19009,
    "Dishonored.exe":                       32156,
    "Dishonored2.exe":                      490348,
    "Dishonored_DO.exe":                    497438,
    "Fallout4.exe":                         489776,
    "FortniteClient-Win64-Shipping.exe":    33214,
    "Oblivion.exe":                         18526,
    "Prey.exe":                             15013,
    "Subnautica.exe":                       460090,
    "SubnauticaZero.exe":                   511212,
    "WolfNewOrder_x64.exe":                 369259,
    "bms.exe":                              68016,
    "hl2.exe":                              1420,
    "wb.exe":                               495060,
    
}
"LockApp.exe", "chrome.exe", "explorer.exe", "ApplicationFrameHost.exe", "unknown", "Steam.exe", "steam.exe", "Discord.exe", "NVIDIA Share.exe"


In [None]:
import csv
import requests
import time

games = []
json_games = dict()

with open("data/my-games.txt", encoding="utf-8") as txt_file:
    re_jpg = re.compile(R'<meta content="(//images\.igdb\.com/igdb/image/upload/t_cover_big/.+?\.jpg)"')
    re_id = re.compile(R'<meta id="pageid" content="game" data-game-id="(\d+)" ')
    
    r = csv.DictReader(txt_file, delimiter='\t', quotechar='|')
    for row in r:
        igdb_id = None
        igdb_box_art_url = None
        
        with open(f"data/igdb/{row['game_id']}.html", "r", encoding="utf-8") as f:
            for line in f:
                if (m := re_jpg.search(line)):
                    igdb_box_art_url = "https:" + m.group(1)

                if (m := re_id.search(line)):
                    igdb_id = int(m.group(1))

        json_games[utils.make_int(row["game_id"])] = {
            "game_id":              utils.make_int(row["game_id"]),
            "game_name":            utils.make_str(row["game_name"]),
            "twitch_box_art_url":   utils.make_str(row["twitch_box_art_url"]),
            "twitch_game_url":      utils.make_str(row["twitch_game_url"]),
            "youtube_game_url":     utils.make_str(row["youtube_game_url"]),
            "game_source":          "Steam",
            "igdb_id":              igdb_id,
            "igdb_url":             utils.make_str(row["igdb_url"]),
            "igdb_box_art_url":     igdb_box_art_url,
            "steam_app_id":         utils.make_int(row["steam_app_id"]),
            "steam_url":            utils.make_str(row["steam_url"]),
            "gog_url":              utils.make_str(row["gog_url"]),
            "epic_url":             utils.make_str(row["epic_url"])
            # "config_url":           "https://steamdb.info/app/" + str(utils.make_int(row["steam_app_id"])) + "/config/"
        }
        
        games.append((
            utils.make_int(row["game_id"]), 
            utils.make_str(row["game_name"]), 
            utils.make_str(row["twitch_box_art_url"]), 
            utils.make_str(row["twitch_game_url"]), 
            utils.make_str(row["youtube_game_url"]),
            "Steam",
            igdb_id,
            utils.make_str(row["igdb_url"]),
            igdb_box_art_url,
            utils.make_int(row["steam_app_id"]),
            utils.make_str(row["steam_url"]),
            utils.make_str(row["gog_url"]),
            utils.make_str(row["epic_url"])
        ))

with open("ref-data/games.json", "w") as f:
    json.dump(json_games, f, ensure_ascii=False, sort_keys=True, indent='  ')

print(json.dumps(json_games, indent='  '))
# display(games)




In [None]:
# Now insert all games into the Games table.
import sqlite3
conn = sqlite3.connect("data/dmatech.db")
cursor = conn.cursor()
cursor.executemany('''
    INSERT INTO Games (
        game_id, game_name, twitch_box_art_url, twitch_game_url, youtube_game_url, game_source,
        igdb_id, igdb_url, igdb_box_art_url, steam_app_id, steam_url, gog_url, epic_url
    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', games)
conn.commit()
conn.close()

In [None]:
# Now, load the VideoSnapshots table.
import glob
videos = []

# First, load everything discovered using "tcd".
for fn in glob.glob("data/chat-logs/*.json"):
    time_stamp = float(os.path.getmtime(fn))
    
    with open(fn) as json_file:
        data = json.load(json_file)
        v = data['video']
        
        video_id = int(v["id"])
        stream_id = None
        
        # Some of the TCD output actually has the stream ID.
        if "stream_id" in v:
            stream_id = int(v["stream_id"])
        
        videos.append((video_id, stream_id, time_stamp, "tcd", json.dumps(v)))

# Get the stuff from the new "helix" API.
# twitch api get videos -P -q user_id=217476645 > videos_2022-06-04.json
# scp -p 192.168.1.19:/home/dma/twitch-logs/dmatech/videos_*.json .
for fn in glob.glob("data/more-json/videos_*.json"):
    time_stamp = float(os.path.getmtime(fn))
    
    with open(fn) as json_file:
        data = json.load(json_file)
        
        for v in data["data"]:
            video_id = int(v["id"])
            stream_id = None

            if "stream_id" in v:
                stream_id = int(v["stream_id"])

            videos.append((video_id, stream_id, time_stamp, "helix", json.dumps(v)))

In [None]:
# Now insert it into the VideoSnapshots table.
import sqlite3
conn = sqlite3.connect("data/dmatech.db")
cursor = conn.cursor()
cursor.executemany('''
    INSERT INTO VideoSnapshots (video_id, stream_id, time_stamp, json_format, json_data)
    VALUES (?, ?, ?, ?, ?)
''', videos)
conn.commit()
conn.close()

# display(videos)

In [None]:
# Load an overview of IRC logs
irc_logs = []
json_irc_logs = []

with open("data/irc-logs.txt", encoding="utf-8") as txt_file:
    r = csv.DictReader(txt_file, delimiter='\t', quotechar='|')
    for row in r:
        print(f"[{row['min_ts']:>12}, {row['max_ts']:>12}, {row['xz_size']:>12}, \"{row['xz_sha256sum']}, \"{row['xz_path']}\"],")
        
        json_irc_logs.append({
            "xz_path": row["xz_path"],
            "xz_sha256sum": row["xz_sha256sum"],
            "xz_size": utils.make_int(row["xz_size"]),
            "min_ts": utils.make_int(row["min_ts"]),
            "max_ts": utils.make_int(row["max_ts"])
        })

json_irc_logs.sort(key=lambda x: x["min_ts"], reverse=False)

with open("ref-data/irc-logs.json", "w") as f:
    json.dump(json_irc_logs, f, ensure_ascii=False, sort_keys=True, indent='  ')

print(json.dumps(json_irc_logs, indent='  '))


In [None]:
# https://twitchtracker.com/dmatech/streams/39720122445
# https://sullygnome.com/channel/dmatech/365/stream/45410307996

In [None]:
# From 836245698.json:
with open("data/chat-logs/836245698.json") as json_file:
    data = json.load(json_file)
    v_obj = data["video"]
    c_obj = data["comments"][1]
    ts = dateutil.parser.isoparse(c_obj['created_at']).timestamp()*1000
    
# From /home/dma/twitch-automation/logs/dmatech/rawlog-20201215.log.xz:
# >> @badge-info=;badges=vip/1;color=;display-name=littlefox_o;emotes=;flags=;id=62f36cf0-9891-4197-8736-23f1d80b07a9;mod=0;room-id=217476645;subscriber=0;tmi-sent-ts=1607886088954;turbo=0;user-id=534604064;user-type= :littlefox_o!littlefox_o@littlefox_o.tmi.twitch.tv PRIVMSG #dmatech :Hello
irc_ts = 1607886088954
print(ts, irc_ts) # These line up perfectly.

# This is close enough.
tmp_ts = dateutil.parser.isoparse(v_obj["created_at"]).timestamp() + float(c_obj["content_offset_seconds"])
print(tmp_ts*1000)

# The question is...  How does this relate to the timestamps in the OBS logs?  
# Where in a .mkv recording would a chat event be?
# To figure this out, I need to determine corresponding frames in the mkv and mp4 for
# a bunch of videos.


In [None]:
# Display two objects for comparison.
print(json.dumps(v_obj, indent='\t'))
print(json.dumps(c_obj, indent='\t'))


In [None]:
with open("ref-data/irc-logs.json") as json_file:
    data = json.load(json_file)
    
print(type(data[0]["max_ts"]))