In [6]:
# ============================================
# Gaming LiveOps Project â€” Data Cleaning
# ============================================

import pandas as pd
import sqlite3
from pathlib import Path

In [None]:
# --------------------------------------------
# Paths (relative, GitHub friendly)
# --------------------------------------------

RAW_DIR = Path("data/raw")
PROCESSED_DIR = Path("data/processed")
DB_PATH = PROCESSED_DIR / "gaming_liveops.db"

PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# --------------------------------------------
# Paths (relative, GitHub friendly)
# --------------------------------------------

RAW_DIR = Path("data/raw")
PROCESSED_DIR = Path("data/processed")
DB_PATH = PROCESSED_DIR / "gaming_liveops.db"

PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# --------------------------------------------
# Timestamp conversion
# --------------------------------------------

auth["auth_ts"] = pd.to_datetime(auth["auth_ts"], unit="s")
regs["reg_ts"] = pd.to_datetime(regs["reg_ts"], unit="s")

auth["auth_date"] = auth["auth_ts"].dt.date
regs["reg_date"] = regs["reg_ts"].dt.date


In [None]:
# --------------------------------------------
# Build core tables
# --------------------------------------------

# Users table
users = regs.rename(columns={"uid": "user_id"})[["user_id", "reg_date"]]

# Events table
events = auth.rename(columns={"uid": "user_id"})[["user_id", "auth_date"]]

# Revenue table
revenue = ab[["user_id", "revenue", "testgroup"]].copy()
revenue["revenue"] = revenue["revenue"].fillna(0)

print("\nClean tables:")
print("Users:", users.shape)
print("Events:", events.shape)
print("Revenue:", revenue.shape)

In [None]:
# --------------------------------------------
# Export clean CSV files
# --------------------------------------------

users.to_csv(PROCESSED_DIR / "users_clean.csv", index=False)
events.to_csv(PROCESSED_DIR / "events_clean.csv", index=False)
revenue.to_csv(PROCESSED_DIR / "revenue_clean.csv", index=False)

print("\nClean CSV files exported to:", PROCESSED_DIR)

In [None]:
# --------------------------------------------
# Export SQLite database
# --------------------------------------------

conn = sqlite3.connect(DB_PATH)

users.to_sql("users", conn, if_exists="replace", index=False)
events.to_sql("events", conn, if_exists="replace", index=False)
revenue.to_sql("revenue", conn, if_exists="replace", index=False)

conn.close()

print("\nSQLite database created:", DB_PATH)

In [None]:
# --------------------------------------------
# Quick sanity checks
# --------------------------------------------

print("\nSanity checks:")
print("Unique users:", users["user_id"].nunique())
print("Event users:", events["user_id"].nunique())
print("Revenue users:", revenue["user_id"].nunique())
print("Paying users:", (revenue["revenue"] > 0).sum())