In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load dataset
df = pd.read_csv("dataset.csv")

# Sort by block_id to simulate time order
df_sorted = df.sort_values(by="block_id")

# Create bins to simulate time chunks
block_bins = pd.cut(df_sorted['block_id'], bins=20)
chunked = df_sorted.groupby(block_bins)

# Get label with lowest count (rarest class)
min_class = df['label'].value_counts().idxmin()

# Undersample majority classes in each time chunk
balanced_chunks = []
for _, chunk in chunked:
    class_counts = chunk['label'].value_counts()
    min_count = class_counts.min()
    sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != min_class else len(x)))
    balanced_chunks.append(sampled.reset_index(drop=True))

# Combine all balanced chunks
balanced_df = pd.concat(balanced_chunks).reset_index(drop=True)

# Prepare data
X_bal = balanced_df.drop(columns=["Unnamed: 0", "label"])
y_bal = balanced_df["label"]

# Normalize
scaler = StandardScaler()
X_bal_scaled = scaler.fit_transform(X_bal)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_bal_scaled, y_bal, test_size=0.3, stratify=y_bal, random_state=42)

# Train model
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))


  chunked = df_sorted.groupby(block_bins)
  sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != min_class else len(x)))
  sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != min_class else len(x)))
  sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != min_class else len(x)))
  sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != min_class else len(x)))
  sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != min_class else len(x)))
  sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != min_class else len(x)))
  sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != min_class else len(x)))
  sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != min_class else len(x)))
  sampled = chunk.groupby('label').apply(lambda x: x.sample(n=min_count if x.name != m

              precision    recall  f1-score   support

           0       1.00      0.97      0.99       472
           1       1.00      1.00      1.00       676
           2       1.00      0.99      1.00       491
           3       0.97      1.00      0.98       466

    accuracy                           0.99      2105
   macro avg       0.99      0.99      0.99      2105
weighted avg       0.99      0.99      0.99      2105

