In [138]:
from google.cloud import bigquery
from dotenv import load_dotenv
from pathlib import Path
import pandas as pd
import numpy as np
import os

In [139]:
# Import env variables from .env file

HOME_DIR = Path.cwd()
ENV_FILE = HOME_DIR / ".env"

if ENV_FILE.exists():
    load_dotenv(ENV_FILE)
else:
    raise

PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT_ID")
DATASET_ID = os.getenv("DATASET_NAME")

In [140]:
# Import tables from BigQuery and save them as csv files

# client = bigquery.Client()
# table_names = ["events", "event_types"]


# def get_table_from_bigquery(table_name):
#     dataset_ref = bigquery.DatasetReference(PROJECT_ID, DATASET_ID)
#     table_ref = dataset_ref.table(table_name)
#     table = client.get_table(table_ref)
#     df = client.list_rows(table).to_dataframe()
#     return df

# for table in table_names:
#     df = get_table_from_bigquery(table)
#     file_path = HOME_DIR / f"data/{table}.csv"
#     df.to_csv(file_path, index=False)
#     print(f"Saved `{table}` table to {file_path}")

In [141]:
# Read csv files
events = pd.read_csv(HOME_DIR / "data/events.csv", parse_dates=["event_timestamp"]).drop("created_at", axis=1)
event_types = pd.read_csv(HOME_DIR / "data/event_types.csv")

# Join tables
df = (
    events.merge(event_types, how="left", on="event_type_id")
    .query("event_type_id != 4")
    .sort_values(["event_timestamp", "event_index"], ascending=[False, False])
)

# Side peak column
df["team"] = np.where(df["event_type_id"] == 7, df["description"].str.split().str[3].str.replace('"', ""), np.nan)
df["team"] = df["team"].replace({"TERRORIST": "T"})

# Boolean column for awp kills
df["weapon"] = np.where(df["event_type_id"] == 2, df["description"].str.split().str[-1].str.strip(), np.nan)
df["is_awp_kill"] = np.where(df["weapon"] == "awp", 1, 0)

df.tail()

Unnamed: 0,event_timestamp,event_index,event_type_id,description,event,team,weapon,is_awp_kill
1037,2025-10-04 23:50:21,11,5,I killed b2 with a headshot from ak47,Kill,,,0
753,2025-10-04 23:50:20,8,5,I killed King_Kyt with a headshot from ak47,Kill,,,0
1184,2025-10-04 23:50:06,6,5,I killed Антон Рапира with ak47,Kill,,,0
1451,2025-10-04 23:49:20,3,7,"I joined team ""TERRORIST"" (Террористы)",Team,T,,0
18,2025-10-04 23:48:17,1,1,I connected to the server,Connect,,,0


In [None]:
# Use team event as start of sessions
# End of session could be connection or disconnection (most reliable approach)

lst = ["a", "x", "x", "b", "a", "a", "x", "b"]
res = []
index = None
num = 1

for i, x in enumerate(lst):
    if x == "a":
        index = i
    if x == "b" and index is not None:
        y = i - index + 1
        res.append(num ** y)
        index = None
        num += 1

res

[1, 8]

In [193]:
df2 = df[df["event_type_id"] == 2]

df2.groupby(df2["event_timestamp"].dt.date).agg(awp_kills=("is_awp_kill", "sum"), row_count=("event_timestamp", "count")).reset_index()

Unnamed: 0,event_timestamp,awp_kills,row_count
0,2025-10-04,0,1
1,2025-10-08,2,6
2,2025-10-12,7,40
3,2025-10-15,25,55
4,2025-10-16,63,138
5,2025-10-17,24,57
6,2025-10-18,14,38
7,2025-10-19,35,112
8,2025-10-26,26,74
9,2025-10-27,5,20


In [197]:
x = df[df["event_type_id"].isin([1, 3, 7])]

x.tail(20)

Unnamed: 0,event_timestamp,event_index,event_type_id,description,event,team,weapon,is_awp_kill
1416,2025-10-15 22:45:37,303,7,"I joined team ""CT"" (Спецназ)",Team,CT,,0
6,2025-10-15 22:44:23,301,1,I connected to the server,Connect,,,0
518,2025-10-12 23:52:37,300,3,I left the game,Disconnect,,,0
1433,2025-10-12 23:52:08,297,7,"I joined team ""CT"" (Спецназ)",Team,CT,,0
1446,2025-10-12 23:29:52,159,7,"I joined team ""TERRORIST"" (Террористы)",Team,T,,0
1469,2025-10-12 23:18:42,105,7,"I joined team ""TERRORIST"" (Террористы)",Team,T,,0
1436,2025-10-12 23:15:35,90,7,"I joined team ""CT"" (Спецназ)",Team,CT,,0
9,2025-10-12 23:15:19,88,1,I connected to the server,Connect,,,0
514,2025-10-12 15:32:12,87,3,I left the game,Disconnect,,,0
1434,2025-10-12 15:31:19,84,7,"I joined team ""CT"" (Спецназ)",Team,CT,,0


In [143]:
events.dtypes

event_timestamp    datetime64[ns]
event_index                 int64
event_type_id               int64
description                object
dtype: object