In [16]:
import os
os.environ["OMP_NUM_THREADS"] = "2"

In [4]:
import pandas as pd
import numpy as np
import re

df0 = pd.read_csv("merged_factorio.csv")

# 1) Tick bazında tekilleştir (aynı tick varsa son satırı al)
df = df0.sort_values(["tick"]).groupby("tick", as_index=False).last()

TICKS_PER_MIN = 3600

# Define m_count_cols and prod_cols first
m_count_cols = [c for c in df.columns if re.match(r"^M\d+_count$", c)]
prod_cols = [c for c in df.columns if c.startswith("production__")]

# Calculate dtick and dmin for rate calculations
dtick = df["tick"].diff()
dmin = dtick / TICKS_PER_MIN

# 2) rate kolonlarını topluca üret (fragmentation uyarısı da gider)
new_cols = {}

for c in m_count_cols:
    new_cols[c.replace("_count", "_per_min")] = df[c].diff() / dmin

for c in prod_cols:
    new_cols[c + "_per_min"] = df[c].diff() / dmin

rates = pd.DataFrame(new_cols)
rates = rates.clip(lower=0)

df = pd.concat([df, rates], axis=1)

# Now define rate_cols after the new columns have been added to df
rate_cols = [c for c in df.columns if c.endswith("_per_min")]

# Now you can use rate_cols
df[rate_cols] = df[rate_cols].fillna(0)

print("rows after dedup:", len(df))
df[["tick"] + rate_cols[:8]].head(12)

rows after dedup: 710


  df = df0.sort_values(["tick"]).groupby("tick", as_index=False).last()


Unnamed: 0,tick,M01_per_min,M02_per_min,M03_per_min,M04_per_min,M05_per_min,M06_per_min,M07_per_min,M08_per_min
0,1026000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1029600,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1033200,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1036800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1040400,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,1044000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,1047600,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,1051200,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,1054800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,1058400,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
(df["M01_per_min"] > 0).sum(), df["M01_per_min"].max()

(np.int64(45), 58.0)

In [6]:
import pandas as pd
import numpy as np

df0 = pd.read_csv("merged_factorio.csv")
df = df0.sort_values("tick").groupby("tick", as_index=False).last()

s = df["M01_count"]
print("M01_count non-null:", s.notna().sum(), "out of", len(s))
print("M01_count unique (non-null):", s.dropna().nunique())
print("first non-null tick:", df.loc[s.first_valid_index(), "tick"] if s.first_valid_index() is not None else None)

# ilk 50 satırda count değerleri
df[["tick","M01_count","M02_count","M03_count","logsim_power"]].head(50)

M01_count non-null: 365 out of 710
M01_count unique (non-null): 46
first non-null tick: 2268000


  df = df0.sort_values("tick").groupby("tick", as_index=False).last()


Unnamed: 0,tick,M01_count,M02_count,M03_count,logsim_power
0,1026000,,,,
1,1029600,,,,
2,1033200,,,,
3,1036800,,,,
4,1040400,,,,
5,1044000,,,,
6,1047600,,,,
7,1051200,,,,
8,1054800,,,,
9,1058400,,,,


In [7]:
rate = df["M01_count"].diff()
print("Non-zero diffs:", (rate.fillna(0)!=0).sum())
df.loc[rate.fillna(0)!=0, ["tick","M01_count"]].head(20)

Non-zero diffs: 45


Unnamed: 0,tick,M01_count
392,2437200,2353.0
393,2440800,2361.0
394,2444400,2365.0
395,2448000,2369.0
451,2649600,2373.0
452,2653200,2388.0
453,2656800,2417.0
454,2660400,2453.0
455,2664000,2489.0
456,2667600,2523.0


In [8]:
import re
import pandas as pd
import numpy as np

df0 = pd.read_csv("merged_factorio.csv")
df = df0.sort_values("tick").groupby("tick", as_index=False).last()

TICKS_PER_MIN = 3600
dmin = df["tick"].diff() / TICKS_PER_MIN

m_count_cols = [c for c in df.columns if re.match(r"^M\d+_count$", c)]
new_cols = {}

for c in m_count_cols:
    r = df[c].diff() / dmin
    # sadece count mevcutsa rate kalsın, yoksa NaN
    r = r.where(df[c].notna(), np.nan)
    new_cols[c.replace("_count","_per_min")] = r

rates = pd.DataFrame(new_cols).clip(lower=0)
df = pd.concat([df, rates], axis=1)

# Örnek: availability başlangıcını gör
example = "M01"
mask = df[f"{example}_per_min"].notna()
print("M01 rate starts at tick:", df.loc[mask, "tick"].min())
df[["tick", f"{example}_count", f"{example}_per_min"]].loc[df["tick"]>=df.loc[mask,"tick"].min()].head(10)

M01 rate starts at tick: 2271600


  df = df0.sort_values("tick").groupby("tick", as_index=False).last()


Unnamed: 0,tick,M01_count,M01_per_min
346,2271600,2347.0,0.0
347,2275200,2347.0,0.0
348,2278800,2347.0,0.0
349,2282400,2347.0,0.0
350,2286000,2347.0,0.0
351,2289600,2347.0,0.0
352,2293200,2347.0,0.0
353,2296800,2347.0,0.0
354,2300400,2347.0,0.0
355,2304000,2347.0,0.0


In [9]:
import pandas as pd
import numpy as np
import re

df0 = pd.read_csv("merged_factorio.csv")
df = df0.sort_values("tick").groupby("tick", as_index=False).last().copy()

TICKS_PER_MIN = 3600
dmin = df["tick"].diff() / TICKS_PER_MIN

# build per_min for each M
m_ids = sorted({re.findall(r"^(M\d+)_", c)[0] for c in df.columns if re.match(r"^M\d+_count$", c)})
m_rates = {}
for m in m_ids:
    c = f"{m}_count"
    if c not in df.columns: 
        continue
    a = df[c].notna()
    r = (df[c].diff() / dmin).where(a, np.nan).clip(lower=0)
    m_rates[f"{m}_per_min"] = r

df = pd.concat([df, pd.DataFrame(m_rates)], axis=1)

# stable window
core_start = 2268000
df_core = df[df["tick"] >= core_start].reset_index(drop=True)

# item totals
item_to_ratecols = {}
for m in m_ids:
    item_col = f"{m}_item"
    rate_col = f"{m}_per_min"
    if item_col in df_core.columns and rate_col in df_core.columns:
        s = df_core[item_col].dropna()
        if len(s) == 0:
            continue
        item = s.iloc[-1]
        item_to_ratecols.setdefault(item, []).append(rate_col)

item_totals = {}
for item, cols in item_to_ratecols.items():
    item_totals[f"{item}_total_per_min"] = df_core[cols].sum(axis=1, min_count=1)

df_core = pd.concat([df_core, pd.DataFrame(item_totals)], axis=1)

tot_cols = [c for c in df_core.columns if c.endswith("_total_per_min")]
print("Top items by total produced per minute (sum over time):")
print(df_core[tot_cols].sum().sort_values(ascending=False).head(15))

df_core[["tick","logsim_power"] + tot_cols[:8]].head(10)

Top items by total produced per minute (sum over time):
Wire_total_per_min                   14132.0
Gear_total_per_min                   11876.0
cirG_total_per_min                    7766.0
transport-belt_total_per_min          4541.0
Cu_total_per_min                      4232.0
Rod_total_per_min                     1870.0
Pipe_total_per_min                    1308.0
inserter_total_per_min                1057.0
underground-belt_total_per_min         948.0
Red_total_per_min                      871.0
rail_total_per_min                     857.0
Green_total_per_min                    855.0
splitter_total_per_min                 363.0
big-electric-pole_total_per_min        263.0
fast-transport-belt_total_per_min      219.0
dtype: float64


  df = df0.sort_values("tick").groupby("tick", as_index=False).last().copy()


Unnamed: 0,tick,logsim_power,Gear_total_per_min,transport-belt_total_per_min,inserter_total_per_min,cirG_total_per_min,Wire_total_per_min,rail_total_per_min,locomotive_total_per_min,train-stop_total_per_min
0,2268000,90000.0,,,,,,,,
1,2271600,90000.0,0.0,0.0,0.0,131.0,221.0,,,
2,2275200,90000.0,0.0,0.0,0.0,41.0,82.0,,,
3,2278800,86776.0,0.0,0.0,0.0,40.0,86.0,,,
4,2282400,84867.0,0.0,0.0,0.0,38.0,86.0,,,
5,2286000,84841.0,0.0,0.0,0.0,47.0,94.0,,,
6,2289600,88596.0,0.0,0.0,0.0,48.0,108.0,,,
7,2293200,84050.0,0.0,0.0,0.0,58.0,119.0,,,
8,2296800,87647.0,0.0,0.0,0.0,56.0,103.0,,,
9,2300400,89925.0,0.0,0.0,0.0,55.0,109.0,,,


In [10]:
import numpy as np
import pandas as pd

# df_core zaten sende var (tick>=2268000)
# Eğer aynı hücrede değilsen: df_core'u tekrar oluşturduğun hücreyi çalıştır

top_items = [
    "Wire_total_per_min",
    "Gear_total_per_min",
    "cirG_total_per_min",
    "transport-belt_total_per_min",
    "Cu_total_per_min",
    "Rod_total_per_min",
    "Pipe_total_per_min",
    "inserter_total_per_min",
    "underground-belt_total_per_min",
    "Red_total_per_min",
]

base_cols = ["logsim_power", "logsim_pol_a", "logsim_pol_b", "logsim_pol_c"]
feat_cols = base_cols + top_items

X = df_core[feat_cols].copy()

# NaN olanları 0 yapmak burada mantıklı çünkü:
# item_total_per_min NaN = o item o dakikada yok/ölçülmedi -> rate 0 kabul edilebilir
X = X.fillna(0)

print("Rows:", len(X), "Features:", len(feat_cols))
X.head()

Rows: 365 Features: 14


Unnamed: 0,logsim_power,logsim_pol_a,logsim_pol_b,logsim_pol_c,Wire_total_per_min,Gear_total_per_min,cirG_total_per_min,transport-belt_total_per_min,Cu_total_per_min,Rod_total_per_min,Pipe_total_per_min,inserter_total_per_min,underground-belt_total_per_min,Red_total_per_min
0,90000.0,10.53,8.61,1.92,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,90000.0,11.19,8.53,2.66,221.0,0.0,131.0,0.0,78.0,0.0,0.0,0.0,0.0,0.0
2,90000.0,11.87,7.78,4.09,82.0,0.0,41.0,0.0,82.0,1.0,0.0,0.0,0.0,0.0
3,86776.0,11.87,7.43,4.44,86.0,0.0,40.0,0.0,86.0,12.0,0.0,0.0,0.0,0.0
4,84867.0,11.19,18.51,-7.32,86.0,0.0,38.0,0.0,85.0,7.0,0.0,0.0,0.0,0.0


In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

scaler = StandardScaler()
Xs = scaler.fit_transform(X)

pca = PCA(n_components=2, random_state=42)
Z = pca.fit_transform(Xs)

print("Explained variance ratio:", pca.explained_variance_ratio_, "sum:", pca.explained_variance_ratio_.sum())

Explained variance ratio: [0.29820946 0.17164539] sum: 0.4698548465782935


In [12]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

scores = {}
for k in range(2, 7):
    km = KMeans(n_clusters=k, random_state=42, n_init="auto")
    labels = km.fit_predict(Xs)
    sc = silhouette_score(Xs, labels)
    scores[k] = sc
scores



{2: 0.22209057301142765,
 3: 0.25885556910659135,
 4: 0.28022158579816325,
 5: 0.2934758349163916,
 6: 0.2970094288614918}

In [13]:
k = max(scores, key=scores.get)
km = KMeans(n_clusters=k, random_state=42, n_init="auto")
df_core["cluster"] = km.fit_predict(Xs)

# Cluster summary: mean power and mean rates
summary = df_core.groupby("cluster")[["logsim_power"] + top_items].mean().sort_values("logsim_power")
summary



Unnamed: 0_level_0,logsim_power,Wire_total_per_min,Gear_total_per_min,cirG_total_per_min,transport-belt_total_per_min,Cu_total_per_min,Rod_total_per_min,Pipe_total_per_min,inserter_total_per_min,underground-belt_total_per_min,Red_total_per_min
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,67923.944056,13.161972,11.408451,6.471831,0.619718,0.830986,2.161972,1.71831,0.161972,0.169014,0.401408
3,73904.300813,29.203252,40.243902,19.211382,27.211382,0.195122,0.341463,0.0,4.170732,6.113821,0.487805
0,88183.695652,40.73913,77.608696,25.565217,9.130435,2.217391,0.695652,0.0,12.173913,1.086957,30.782609
2,89704.484848,125.878788,30.727273,55.969697,7.636364,103.69697,8.939394,9.454545,3.060606,0.0,0.0
4,99021.461538,58.615385,29.461538,13.846154,4.692308,46.538462,89.230769,6.153846,0.0,1.076923,0.0
5,118692.133333,93.933333,70.8,62.3,19.433333,0.4,1.666667,22.4,4.666667,4.433333,1.533333


In [14]:
df_core["power_z_global"] = (df_core["logsim_power"] - df_core["logsim_power"].mean()) / df_core["logsim_power"].std(ddof=0)

# cluster-aware z
df_core["power_z_cluster"] = df_core.groupby("cluster")["logsim_power"].transform(
    lambda s: (s - s.mean()) / s.std(ddof=0)
)

anoms = df_core[df_core["power_z_cluster"].abs() > 3][
    ["tick","cluster","logsim_power","power_z_cluster"] + top_items
].sort_values("power_z_cluster", ascending=False)

print("Anomalies within cluster (|z|>3):", len(anoms))
anoms.head(20)

Anomalies within cluster (|z|>3): 1


Unnamed: 0,tick,cluster,logsim_power,power_z_cluster,Wire_total_per_min,Gear_total_per_min,cirG_total_per_min,transport-belt_total_per_min,Cu_total_per_min,Rod_total_per_min,Pipe_total_per_min,inserter_total_per_min,underground-belt_total_per_min,Red_total_per_min
37,2401200,2,64611.0,-3.029893,0.0,5.0,0.0,0.0,114.0,0.0,0.0,0.0,0.0,0.0


In [15]:
df_core = df_core.copy()

In [17]:
from sklearn.cluster import KMeans

k = 6
km = KMeans(n_clusters=k, random_state=42, n_init="auto")
df_core["cluster"] = km.fit_predict(Xs)

summary = df_core.groupby("cluster")[["logsim_power"] + top_items].mean()
summary = summary.sort_values("logsim_power")
summary



Unnamed: 0_level_0,logsim_power,Wire_total_per_min,Gear_total_per_min,cirG_total_per_min,transport-belt_total_per_min,Cu_total_per_min,Rod_total_per_min,Pipe_total_per_min,inserter_total_per_min,underground-belt_total_per_min,Red_total_per_min
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,67923.944056,13.161972,11.408451,6.471831,0.619718,0.830986,2.161972,1.71831,0.161972,0.169014,0.401408
3,73904.300813,29.203252,40.243902,19.211382,27.211382,0.195122,0.341463,0.0,4.170732,6.113821,0.487805
0,88183.695652,40.73913,77.608696,25.565217,9.130435,2.217391,0.695652,0.0,12.173913,1.086957,30.782609
2,89704.484848,125.878788,30.727273,55.969697,7.636364,103.69697,8.939394,9.454545,3.060606,0.0,0.0
4,99021.461538,58.615385,29.461538,13.846154,4.692308,46.538462,89.230769,6.153846,0.0,1.076923,0.0
5,118692.133333,93.933333,70.8,62.3,19.433333,0.4,1.666667,22.4,4.666667,4.433333,1.533333


In [18]:
df_core["cluster"].value_counts().sort_index()

cluster
0     23
1    143
2     33
3    123
4     13
5     30
Name: count, dtype: int64

In [19]:
df_core["power_z_cluster"] = df_core.groupby("cluster")["logsim_power"].transform(
    lambda s: (s - s.mean()) / s.std(ddof=0)
)

anoms = df_core[df_core["power_z_cluster"].abs() > 3][
    ["tick","cluster","logsim_power","power_z_cluster"] + top_items
].sort_values("power_z_cluster", ascending=False)

len(anoms), anoms.head(20)

(1,
        tick  cluster  logsim_power  power_z_cluster  Wire_total_per_min  \
 37  2401200        2       64611.0        -3.029893                 0.0   
 
     Gear_total_per_min  cirG_total_per_min  transport-belt_total_per_min  \
 37                 5.0                 0.0                           0.0   
 
     Cu_total_per_min  Rod_total_per_min  Pipe_total_per_min  \
 37             114.0                0.0                 0.0   
 
     inserter_total_per_min  underground-belt_total_per_min  Red_total_per_min  
 37                     0.0                             0.0                0.0  )