In [1]:
import cryptpandas as crp
import pandas as pd
import numpy as np
import re
import time

In [2]:
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium_stealth import stealth
from fake_useragent import UserAgent

# Configure Slack Bot

In [3]:
SLACK_BOT_TOKEN = "BOT_TOKEN" 
CHANNEL_NAME = "#the-challenge" 
AUTHORIZED_USER_ID = "ID"

# Monitor Slack channel

In [4]:
client = WebClient(token=SLACK_BOT_TOKEN)

def get_channel_id(channel_name):
    try:
        response = client.conversations_list()
        for channel in response["channels"]:
            if channel["name"] == channel_name.lstrip("#"):
                return channel["id"]
    except SlackApiError as e:
        print(f"Error fetching channels: {e.response['error']}")
    return None

def extract_id_and_password(message):
    match = re.search(r"release_(\d+)\.crypt.*?passcode is '(.*?)'", message)
    if match:
        file_id = match.group(1)
        password = match.group(2)
        return file_id, password
    return None

def monitor_channel(channel_id, latest_timestamp):
    datasets = []
    try:
        response = client.conversations_history(channel=channel_id, oldest=latest_timestamp, limit=100)
        for message in reversed(response["messages"]):
            if message.get("user") == AUTHORIZED_USER_ID and "Data has just been released" in message["text"]:
                file_id, password = extract_id_and_password(message["text"])
                if file_id and password:
                    datasets.append((message["ts"], file_id, password))
    except SlackApiError as e:
        print(f"Error reading channel history: {e.response['error']}")
    return datasets

# Load data

In [8]:
def load_dataset(file_id, password):
    try:
        file_name = f"release_{file_id}.crypt"
        file_path = f"./{file_name}"
        print(f"Loading dataset: {file_name}")
        X = crp.read_encrypted(path=file_path, password=password)
        print(f"Dataset loaded successfully. Shape: {X.shape}")
        return X
    except Exception as e:
        print(f"Error loading dataset {file_name}: {e}")
        return None

# Get predictions

In [9]:
def create_lagged_features(series, lag=1):
    lagged_data = pd.concat([series.shift(i) for i in range(lag, 0, -1)], axis=1)
    lagged_data.columns = [f'lag_{i}' for i in range(lag, 0, -1)]
    return lagged_data

def compute_predictions(X):
    df = X.copy()
    lag = 5  
    predictions = {}

    for col in df.columns:
        lagged_features = create_lagged_features(df[col], lag=lag)
        lagged_features['target'] = df[col]
        lagged_features.dropna(inplace=True)

        X_full = lagged_features.drop('target', axis=1)
        y_full = lagged_features['target']
        X_train, X_test, y_train, y_test = train_test_split(
            X_full, y_full,
            test_size=0.2,
            shuffle=False 
        )
        
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled  = scaler.transform(X_test)
        param_grid = {
            'alpha': [0.01, 0.1, 1.0, 10, 100]
        }
        ridge_search = GridSearchCV(
            estimator=Ridge(),
            param_grid=param_grid,
            cv=TimeSeriesSplit(n_splits=3),  # time-series cross-validation
            scoring='neg_mean_squared_error',
            n_jobs=-1
        )
        ridge_search.fit(X_train_scaled, y_train)
        best_alpha = ridge_search.best_params_['alpha']

        model = Ridge(alpha=best_alpha)
        model.fit(X_train_scaled, y_train)
        next_input = X_test_scaled[-1:].copy()
        next_prediction = model.predict(next_input)
        predictions[col] = next_prediction[0]

    def normalize_positions(pos_dict):
        pos = pd.Series(pos_dict).replace([np.inf, -np.inf], np.nan).fillna(0)
        abs_sum = pos.abs().sum()
        if abs_sum > 0:
            pos = pos / abs_sum
        pos = pos.clip(-0.1, 0.1)
        if pos.abs().sum() > 0:
            pos = pos / pos.abs().sum()
        return pos

    def get_submission_dict(pos_dict, team_name="Jean Trading 69", passcode="JeanForTheWin"):
        positions = normalize_positions(pos_dict)
        return {**positions.to_dict(), "team_name": team_name, "passcode": passcode}

    return get_submission_dict(predictions)

# Submit automatically in Google form

In [10]:
LOGIN_URL = "https://accounts.google.com/signin"
GOOGLE_FORM_URL = "https://docs.google.com/forms/d/e/1FAIpQLSeUYMkI5ce18RL2aF5C8I7mPxF7haH23VEVz7PQrvz0Do0NrQ/viewform"

In [13]:
options = webdriver.ChromeOptions()
ua = UserAgent()
options.add_argument(f'user-agent={ua.random}')
driver = webdriver.Chrome(options=options)
stealth(driver, languages=["en-US", "en"], vendor="Google Inc.", platform="Win32", webgl_vendor="Intel Inc.", renderer="Intel Iris OpenGL Engine", fix_hairline=True)

def submit_to_google_form(submission_dict):
    driver.get(GOOGLE_FORM_URL)
    textarea = driver.find_element(By.XPATH, "/html/body/div/div[2]/form/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div[2]/textarea")
    textarea.clear()
    textarea.send_keys(str(submission_dict).replace("'", '"'))
    print("Form filled successfully. Please review and submit manually.")

# Workflow

In [15]:
def main():
    print("Initializing monitoring...")
    channel_id = get_channel_id(CHANNEL_NAME)
    latest_timestamp = "0"
    processed_datasets = set()

    while True:
        datasets = monitor_channel(channel_id, latest_timestamp)
        for ts, file_id, password in datasets:
            if file_id not in processed_datasets:
                X = load_dataset(file_id, password)
                if X is not None:
                    submission = compute_predictions(X)
                    submit_to_google_form(submission)
                processed_datasets.add(file_id)
                latest_timestamp = ts
        time.sleep(10)