In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict
from scipy.optimize import lsq_linear

# Load CSV
df = pd.read_csv("StormSurgematch.csv")

# Filter necessary columns
df = df[['key', 'alliance', 'team_key', 'Scouter_Initials', 't_L1', 't_L2', 't_L3', 't_L4', 'ba_teleopCoralCount']]
df = df.dropna(subset=['t_L1'])
# Ensure numeric types
for col in ['t_L1', 't_L2', 't_L3', 't_L4', 'ba_teleopCoralCount']:
    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

# Group by match and alliance
grouped = df.groupby(['key', 'alliance'])

# Build system of equations
scouter_index = {}
equations = []
results = []

for (match, alliance), group in grouped:
    if len(group) != 3:
        continue  # Skip incomplete alliances

    # Total scouted coral
    scouted_total = group[['t_L1', 't_L2', 't_L3', 't_L4']].sum(axis=1).sum()
    official_total = group['ba_teleopCoralCount'].iloc[0]
    diff = abs(scouted_total - official_total)

    # Build equation row
    row = [0] * len(scouter_index)
    scouters = group['Scouter_Initials'].tolist()

    for scouter in scouters:
        if scouter not in scouter_index:
            scouter_index[scouter] = len(scouter_index)
            row.append(1)
            # Extend all previous rows
            for eq in equations:
                eq.append(0)
        else:
            idx = scouter_index[scouter]
            if idx >= len(row):
                row.extend([0] * (idx - len(row) + 1))
            row[idx] = 1

    # Pad row if new scouters were added
    while len(row) < len(scouter_index):
        row.append(0)

    equations.append(row)
    results.append(diff)

# Convert to numpy arrays
A = np.array(equations)
b = np.array(results)
print(A)
print(b)

# Solve using least squares
solution = np.linalg.lstsq(A, b, rcond=None)[0]
x = solution

# Map results back to scouters
scouter_variance = {scouter: x[idx] for scouter, idx in scouter_index.items()}

# Display results
print("Scouter Variance Estimates (lower is better):")
for scouter, variance in sorted(scouter_variance.items(), key=lambda x: abs(x[1])):
    print(f"{scouter}: {variance:.3f}")

[[1 1 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [1 1 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[ 2.  2.  6.  6.  2.  6.  1.  9.  8.  2.  7.  7.  3.  1.  5. 10.  4.  1.
  0.  4. 11.  2.  5.  4.  0.  3. 11.  3. 12.  3.  1.  3.  3.  9.  7. 17.
  3.  1.  3.  0.  5.  0.  3.  5.  2.  1.  1.  4.  2.  0.  1.]


LinAlgError: Last 2 dimensions of the array must be square