In [None]:


import os
import gc
import pandas as pd
import numpy as np
from sklearn.metrics import (
    roc_auc_score, precision_recall_curve, roc_curve, auc,
    confusion_matrix, cohen_kappa_score, matthews_corrcoef, f1_score
)
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from rdkit import Chem
from rdkit.Chem import Descriptors, AllChem
from transformers import AutoModel, AutoTokenizer
from tqdm import tqdm
import psutil

from sklearn.metrics import roc_auc_score, precision_recall_curve, roc_curve, auc, confusion_matrix, cohen_kappa_score, matthews_corrcoef, f1_score


import warnings
from rdkit import RDLogger
# 关闭 RDKit 的所有日志（包括警告）
RDLogger.DisableLog('rdApp.*')  # 禁用所有 RDKit 日志
# 导入svg高清图库
import seaborn as sns
import matplotlib.pyplot as plt

# 设置 Matplotlib 支持中文字体
plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

# 设置 Matplotlib 后端为 SVG
%config InlineBackend.figure_format = 'svg'

# 设置 DPI 以提高图像清晰度
plt.rcParams['figure.dpi'] = 300
warnings.filterwarnings("ignore")

# ---------------------- Memory Protection ----------------------
def memory_safe(func):
    def wrapper(*args, **kwargs):
        mem = psutil.virtual_memory()
        if mem.percent > 80:
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            print(f"⚠️ Memory warning: Usage {mem.percent}%, performed garbage collection")
        return func(*args, **kwargs)
    return wrapper

# ---------------------- SMILES Feature Extraction ----------------------
class SMILESFeatureExtractor:
    def __init__(self, fp_size=1024, desc_list=None):
        self.fp_size = fp_size
        self.desc_list = desc_list or [
           'MolWt', 'NumHAcceptors', 'NumHDonors', 
           'MolLogP', 'TPSA', 'NumRotatableBonds'
          
        ]
    
    @memory_safe
    def smiles_to_features(self, smiles):
        """Convert SMILES to numerical features"""
        try:
            mol = Chem.MolFromSmiles(smiles)
            if not mol:
                return np.nan * np.ones(len(self.desc_list) + self.fp_size)
            
            # Calculate descriptors
            desc_values = [getattr(Descriptors, desc)(mol) for desc in self.desc_list]
            
            # Calculate fingerprints
            fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=self.fp_size)
            fp_values = np.array(fp, dtype=np.float32)
            
            return np.concatenate([desc_values, fp_values])
        except:
            return np.nan * np.ones(len(self.desc_list) + self.fp_size)

# ---------------------- Data Preparation ----------------------
def prepare_features(X_smiles):
    """Convert SMILES pairs to numerical features"""
    fe = SMILESFeatureExtractor()
    features = []
    
    for drug1, drug2 in tqdm(X_smiles, desc="Extracting features"):
        feat1 = fe.smiles_to_features(drug1)
        feat2 = fe.smiles_to_features(drug2)
        features.append(np.concatenate([feat1, feat2]))
    
    X_num = np.stack(features)
    
    # Handle NaN values
    X_num = np.nan_to_num(X_num)
    return X_num

# ---------------------- Deep Learning Components ----------------------
class DrugInteractionDataset(Dataset):
    def __init__(self, drug1_smiles, drug2_smiles, labels=None, tokenizer=None, max_length=128):
        self.drug1_smiles = drug1_smiles
        self.drug2_smiles = drug2_smiles
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        
    def __len__(self):
        return len(self.drug1_smiles)
    
    def __getitem__(self, idx):
        encoding1 = self.tokenizer(
            str(self.drug1_smiles[idx]), 
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        encoding2 = self.tokenizer(
            str(self.drug2_smiles[idx]),
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        item = {
            'drug1_input_ids': encoding1['input_ids'].flatten(),
            'drug1_attention_mask': encoding1['attention_mask'].flatten(),
            'drug2_input_ids': encoding2['input_ids'].flatten(),
            'drug2_attention_mask': encoding2['attention_mask'].flatten(),
        }
        
        if self.labels is not None:
            item['label'] = torch.tensor(self.labels[idx], dtype=torch.long)
            
        return item


class CoAttentionModel(nn.Module):
    def __init__(self, bert_model_name="DeepChem/ChemBERTa-77M-MLM", hidden_size=384):
        super().__init__()
        self.bert = AutoModel.from_pretrained(bert_model_name)
        self.co_attention = nn.MultiheadAttention(hidden_size, num_heads=8)
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size*4, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_size, 2)
        )
    
    def forward(self, drug1_input_ids, drug1_attention_mask, drug2_input_ids, drug2_attention_mask):
        drug1 = self.bert(drug1_input_ids, attention_mask=drug1_attention_mask).last_hidden_state[:, 0, :]
        drug2 = self.bert(drug2_input_ids, attention_mask=drug2_attention_mask).last_hidden_state[:, 0, :]
        
        # Co-attention
        attn1, _ = self.co_attention(drug1.unsqueeze(1), drug2.unsqueeze(1), drug2.unsqueeze(1))
        attn2, _ = self.co_attention(drug2.unsqueeze(1), drug1.unsqueeze(1), drug1.unsqueeze(1))
        
        combined = torch.cat([drug1, drug2, attn1.squeeze(1), attn2.squeeze(1)], dim=1)
        return self.classifier(combined)

# ---------------------- Training and Evaluation ----------------------
@memory_safe
def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=10):
    best_val_auc = 0
    history = {'train_loss': [], 'val_loss': [], 'val_auc': []}
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
        
        for batch in progress_bar:
            optimizer.zero_grad()
            inputs = {k: v.to(device) for k, v in batch.items() if k != 'label'}
            outputs = model(**inputs)
            loss = criterion(outputs, batch['label'].to(device))
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            progress_bar.set_postfix({'loss': train_loss/(progress_bar.n+1)})
        
        # Validation
        val_loss, val_metrics = evaluate_model(model, val_loader, criterion, device)
        print(f"\nValidation - Loss: {val_loss:.4f}, AUC: {val_metrics['AUC']:.4f}, F1: {val_metrics['F1']:.4f}")
        
        # Save best model
        if val_metrics['AUC'] > best_val_auc:
            best_val_auc = val_metrics['AUC']
            torch.save(model.state_dict(), "best_model.pth")
            print("✅ Saved best model")
        
        history['train_loss'].append(train_loss/len(train_loader))
        history['val_loss'].append(val_loss)
        history['val_auc'].append(val_metrics['AUC'])
    
    return model, history

def find_optimal_cutoff(tpr, fpr, thresholds):
    """Find optimal cutoff point using Youden's J statistic"""
    youden = tpr - fpr
    return thresholds[np.argmax(youden)]

def best_confusion_matrix(y_test, y_test_predprob):
    """Calculate metrics using optimal cutoff"""
    fpr, tpr, thresholds = roc_curve(y_test, y_test_predprob, pos_label=1)
    cutoff = find_optimal_cutoff(tpr, fpr, thresholds)
    y_pred = list(map(lambda x: 1 if x >= cutoff else 0, y_test_predprob))
    TN, FP, FN, TP = confusion_matrix(y_test, y_pred).ravel()
    return cutoff, TN, FN, FP, TP

@memory_safe
def evaluate_model(model, data_loader, criterion, device):
    model.eval()
    total_loss = 0
    all_labels = []
    all_probs = []
    
    with torch.no_grad():
        for batch in data_loader:
            inputs = {k: v.to(device) for k, v in batch.items() if k != 'label'}
            outputs = model(**inputs)
            loss = criterion(outputs, batch['label'].to(device))
            total_loss += loss.item()
            
            probs = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            all_labels.extend(batch['label'].cpu().numpy())
            all_probs.extend(probs)
    
    # 使用最佳阈值计算指标
    cutoff, TN, FP, FN, TP = best_confusion_matrix(all_labels, all_probs)
    
    metrics = {
        "AUC": roc_auc_score(all_labels, all_probs),
        "Cutoff": cutoff,
        "Sensitivity": TP / (TP + FN),
        "Specificity": TN / (TN + FP),
        "Kappa": cohen_kappa_score(all_labels, (np.array(all_probs) >= cutoff).astype(int)),
        "MCC": matthews_corrcoef(all_labels, (np.array(all_probs) >= cutoff).astype(int)),
        "F1": f1_score(all_labels, (np.array(all_probs) >= cutoff).astype(int)),
    }
    
    return total_loss / len(data_loader), metrics

@memory_safe
def predict(model, data_loader, device):
    """Make predictions on new data"""
    model.eval()
    all_probs = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Predicting"):
            # 只传递模型需要的输入参数，排除label
            inputs = {
                'drug1_input_ids': batch['drug1_input_ids'].to(device),
                'drug1_attention_mask': batch['drug1_attention_mask'].to(device),
                'drug2_input_ids': batch['drug2_input_ids'].to(device),
                'drug2_attention_mask': batch['drug2_attention_mask'].to(device)
            }
            outputs = model(**inputs)
            probs = torch.softmax(outputs, dim=1)[:, 1].cpu().numpy()
            all_probs.extend(probs)
            
            # 如果有标签，也收集标签
            if 'label' in batch:
                all_labels.extend(batch['label'].cpu().numpy())
    
    return np.array(all_probs), np.array(all_labels) if all_labels else None

def save_prediction_scores(model_name, smiles_pairs, probs, labels=None, cutoff=None):
    """Save prediction scores with original labels to CSV"""
    df = pd.DataFrame({
        'Drug1_SMILES': smiles_pairs[:, 0],
        'Drug2_SMILES': smiles_pairs[:, 1],
        'Prediction_Score': probs
    })
    
    if labels is not None:
        df['Original_Label'] = labels
    
    if cutoff is not None:
        df['Predicted_Label'] = (probs >= cutoff).astype(int)
    
    filename = f"{model_name.replace(' ', '_')}_prediction_scores.csv"
    df.to_csv(filename, index=False)
    print(f"✅ {model_name} prediction scores saved to {filename}")

# ---------------------- Main Execution ----------------------
def main():
    # Load training data
    train_df = pd.read_csv("/kaggle/working/222222/222222/2/train.txt", sep='\t', header=None)
    X_train_smiles = train_df.iloc[:, :2].values
    y_train = train_df.iloc[:, -1].values
    
    # Load test data
    test_df = pd.read_csv("/kaggle/working/222222/222222/2/test.txt", sep='\t', header=None)
    X_test_smiles = test_df.iloc[:, :2].values
    y_test = test_df.iloc[:, -1].values if test_df.shape[1] > 2 else None
    
    # Prepare numerical features for traditional models
    print("\nPreparing features...")
    X_train_num = prepare_features(X_train_smiles)
    X_test_num = prepare_features(X_test_smiles)
    
    scaler = StandardScaler()
    X_train_num = scaler.fit_transform(X_train_num)
    X_test_num = scaler.transform(X_test_num)

    # Define classifiers
    classifiers = {
        "ChemCoBERT": None,
        "Decision Tree": DecisionTreeClassifier(
            max_depth=5,                # 大幅降低深度
            min_samples_split=20,       # 增大分裂门槛
            min_samples_leaf=10,        # 增大叶节点样本数
            max_features=0.8,           # 改用比例而非"sqrt"
            criterion="gini",           # 换回gini尝试
            class_weight="balanced",
            random_state=42
        ),
        "AdaBoost": AdaBoostClassifier(n_estimators=100, random_state=42),
        "GBDT": GradientBoostingClassifier(
            n_estimators=100, 
            random_state=42,  # 统一随机种子
            learning_rate=0.1,  # 不同的学习率
            max_depth=2,        # 不同的最大深度
            subsample=0.5,      # 添加子采样
            max_features='sqrt' # 添加特征子采样
        ),
        "K-NN": KNeighborsClassifier(n_neighbors=5),
        "Naive Bayes": GaussianNB(var_smoothing=1e-2)
    }
    
    # Initialize metrics storage
    all_metrics = {}
    pr_curves = {}
    roc_curves = {}
    
    # Train and evaluate each model
    for name, clf in classifiers.items():
        print(f"\n{'='*50}\nTraining {name}...\n{'='*50}")
        
        if name == "ChemCoBERT":
            # Deep learning model setup
            tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-77M-MLM")
            train_dataset = DrugInteractionDataset(
                X_train_smiles[:, 0], X_train_smiles[:, 1], y_train, tokenizer
            )
            test_dataset = DrugInteractionDataset(
                X_test_smiles[:, 0], X_test_smiles[:, 1], y_test, tokenizer
            )
            
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model = CoAttentionModel().to(device)
            optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
            criterion = nn.CrossEntropyLoss()
            
            # Train
            model, _ = train_model(
                model, 
                DataLoader(train_dataset, batch_size=8, shuffle=True),
                DataLoader(test_dataset, batch_size=8),
                optimizer, criterion, device, epochs=10
            )
            
            # Load best model
            model.load_state_dict(torch.load("best_model.pth"))
            
            # Evaluate if test labels are available
            if y_test is not None:
                _, test_metrics = evaluate_model(
                    model, 
                    DataLoader(test_dataset, batch_size=8),
                    criterion, device
                )
                all_metrics[name] = test_metrics
                
                # Get predictions for curves
                test_probs, _ = predict(model, DataLoader(test_dataset, batch_size=128), device)
                
                # Store curves
                precision, recall, _ = precision_recall_curve(y_test, test_probs)
                pr_auc = auc(recall, precision)
                pr_curves[name] = {
                    "precision": precision,
                    "recall": recall,
                    "auc": pr_auc
                }
                
                fpr, tpr, _ = roc_curve(y_test, test_probs)
                roc_auc = auc(fpr, tpr)
                roc_curves[name] = {
                    "fpr": fpr,
                    "tpr": tpr,
                    "auc": roc_auc
                }
                
                # 保存预测分数和原始标签
                save_prediction_scores(
                    name, 
                    X_test_smiles, 
                    test_probs, 
                    y_test, 
                    cutoff=test_metrics['Cutoff']
                )
            
            # Save model
            torch.save(model.state_dict(), f"{name.replace(' ', '_')}_model.pth")

        else:
            # Traditional ML models

            clf.fit(X_train_num, y_train)
    
            # Save model
            import joblib
            joblib.dump(clf, f"{name.replace(' ', '_')}_model.joblib")
    
            if y_test is not None:
                # Predict probabilities
                if hasattr(clf, "predict_proba"):
                    test_probs = clf.predict_proba(X_test_num)[:, 1]
                else:
                    test_probs = clf.decision_function(X_test_num)
                    test_probs = (test_probs - test_probs.min()) / (test_probs.max() - test_probs.min())
                
                # 使用最佳阈值计算指标
                cutoff, TN, FP, FN, TP = best_confusion_matrix(y_test, test_probs)
                
                all_metrics[name] = {
                    "AUC": roc_auc_score(y_test, test_probs),
                    "Cutoff": cutoff,
                    "Sensitivity": TP / (TP + FN),
                    "Specificity": TN / (TN + FP),
                    "Kappa": cohen_kappa_score(y_test, (test_probs >= cutoff).astype(int)),
                    "MCC": matthews_corrcoef(y_test, (test_probs >= cutoff).astype(int)),
                    "F1": f1_score(y_test, (test_probs >= cutoff).astype(int)),
                }
                
                # 保存预测分数和原始标签
                save_prediction_scores(
                    name, 
                    X_test_smiles, 
                    test_probs, 
                    y_test, 
                    cutoff=cutoff
                )
        
            # Store curves
            precision, recall, _ = precision_recall_curve(y_test, test_probs)
            pr_auc = auc(recall, precision)
            pr_curves[name] = {
                "precision": precision,
                "recall": recall,
                "auc": pr_auc
            }
            
            fpr, tpr, _ = roc_curve(y_test, test_probs)
            roc_auc = auc(fpr, tpr)
            roc_curves[name] = {
                "fpr": fpr,
                "tpr": tpr,
                "auc": roc_auc
            }

    # Print final metrics if test labels are available
    if y_test is not None:
        print("\nFinal Test Metrics:")
        for name, metrics in all_metrics.items():
            print(f"\n{name}:")
            for metric, value in metrics.items():
                print(f"{metric}: {value:.4f}")
        
        # Plot curves
        plt.figure(figsize=(15, 6))
        
        # PR Curve
        plt.subplot(122)
        for name in classifiers:
            if name in pr_curves:
                # Set ChemCoBERT to red, others to default colors
                if name == "ChemCoBERT":
                    plt.plot(pr_curves[name]["recall"], pr_curves[name]["precision"], 
                            '#E41A1C', linewidth=3, 
                            label=f"{name} (AUC={pr_curves[name]['auc']:.4f})")
                elif name == "Decision Tree":
                    plt.plot(pr_curves[name]["recall"], pr_curves[name]["precision"], 
                            '#A65628', linewidth=2.5, linestyle='--',  # 虚线
                            label=f"{name} (AUC={pr_curves[name]['auc']:.4f})")
                elif name == "AdaBoost":
                    plt.plot(pr_curves[name]["recall"], pr_curves[name]["precision"], 
                            '#4DAF4A', linewidth=2.5, 
                            label=f"{name} (AUC={pr_curves[name]['auc']:.4f})")
                elif name == "GBDT":
                    plt.plot(pr_curves[name]["recall"], pr_curves[name]["precision"], 
                            '#FF7F00', linewidth=2.5, 
                            label=f"{name} (AUC={pr_curves[name]['auc']:.4f})")
                elif name == "K-NN":
                    plt.plot(pr_curves[name]["recall"], pr_curves[name]["precision"], 
                            '#984EA3', linewidth=2.5, 
                            label=f"{name} (AUC={pr_curves[name]['auc']:.4f})")
                elif name == "Naive Bayes":
                    plt.plot(pr_curves[name]["recall"], pr_curves[name]["precision"], 
                            '#377EB8', linewidth=2.5,  alpha=0.4,  # 半透明
                            label=f"{name} (AUC={pr_curves[name]['auc']:.4f})")
        
        plt.xlabel("Recall", fontweight='bold')
        plt.ylabel("Precision", fontweight='bold')
        plt.title("Precision-Recall Curve", fontweight='bold')
        plt.legend(loc="lower left")
        plt.grid(True, alpha=0.3)
        
        # ROC Curve
        plt.subplot(121)
        for name in classifiers:
            if name in roc_curves:
                # Set ChemCoBERT to red, others to default colors
                if name == "ChemCoBERT":
                    plt.plot(roc_curves[name]["fpr"], roc_curves[name]["tpr"], 
                            '#E41A1C', linewidth=3, 
                            label=f"{name} (AUC={roc_curves[name]['auc']:.4f})")
                elif name == "Decision Tree":
                    plt.plot(roc_curves[name]["fpr"], roc_curves[name]["tpr"], 
                            '#A65628', linewidth=2.5, linestyle='--',  # 虚线
                            label=f"{name} (AUC={roc_curves[name]['auc']:.4f})")
                elif name == "AdaBoost":
                    plt.plot(roc_curves[name]["fpr"], roc_curves[name]["tpr"], 
                            '#4DAF4A', linewidth=2.5, 
                            label=f"{name} (AUC={roc_curves[name]['auc']:.4f})")
                elif name == "GBDT":
                    plt.plot(roc_curves[name]["fpr"], roc_curves[name]["tpr"], 
                            '#FF7F00', linewidth=2.5, 
                            label=f"{name} (AUC={roc_curves[name]['auc']:.4f})")
                elif name == "K-NN":
                    plt.plot(roc_curves[name]["fpr"], roc_curves[name]["tpr"], 
                            '#984EA3', linewidth=2.5, 
                            label=f"{name} (AUC={roc_curves[name]['auc']:.4f})")
                elif name == "Naive Bayes":
                    plt.plot(roc_curves[name]["fpr"], roc_curves[name]["tpr"], 
                            '#377EB8', linewidth=2.5,  alpha=0.4,  # 半透明
                            label=f"{name} (AUC={roc_curves[name]['auc']:.4f})")
        
        
        plt.xlabel("False Positive Rate", fontweight='bold')
        plt.ylabel("True Positive Rate", fontweight='bold')
        plt.title("Receiver Operating Characteristic Curve", fontweight='bold')
        plt.legend(loc="lower right")
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    
        # 修改预测部分（在main函数中）
        if y_test is None:
            print("\nGenerating predictions for test set...")
            predictions = {}
            cutoffs = {}  # 存储各模型的最佳阈值
            
            for name in classifiers:
                if name == "ChemCoBERT":
                    # Load model and tokenizer
                    tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-77M-MLM")
                    test_dataset = DrugInteractionDataset(
                        X_test_smiles[:, 0], X_test_smiles[:, 1], None, tokenizer
                    )
                    
                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                    model = CoAttentionModel().to(device)
                    model.load_state_dict(torch.load(f"{name.replace(' ', '_')}_model.pth"))
                    
                    # Predict
                    test_probs, _ = predict(model, DataLoader(test_dataset, batch_size=128), device)
                    predictions[name] = test_probs
                    # 使用训练数据确定最佳阈值
                    train_probs, _ = predict(model, DataLoader(train_dataset, batch_size=128), device)
                    fpr, tpr, thresholds = roc_curve(y_train, train_probs)
                    cutoffs[name] = find_optimal_cutoff(tpr, fpr, thresholds)
                    
                    # 保存预测分数
                    save_prediction_scores(
                        name, 
                        X_test_smiles, 
                        test_probs, 
                        None, 
                        cutoff=cutoffs[name]
                    )
                else:
                    # Load traditional model
                    import joblib
                    clf = joblib.load(f"{name.replace(' ', '_')}_model.joblib")
                    
                    # Predict probabilities
                    if hasattr(clf, "predict_proba"):
                        test_probs = clf.predict_proba(X_test_num)[:, 1]
                    else:
                        test_probs = clf.decision_function(X_test_num)
                        test_probs = (test_probs - test_probs.min()) / (test_probs.max() - test_probs.min())
                    
                    predictions[name] = test_probs
                    # 使用训练数据确定最佳阈值
                    if hasattr(clf, "predict_proba"):
                        train_probs = clf.predict_proba(X_train_num)[:, 1]
                    else:
                        train_probs = clf.decision_function(X_train_num)
                        train_probs = (train_probs - train_probs.min()) / (train_probs.max() - train_probs.min())
                    fpr, tpr, thresholds = roc_curve(y_train, train_probs)
                    cutoffs[name] = find_optimal_cutoff(tpr, fpr, thresholds)
                    
                    # 保存预测分数
                    save_prediction_scores(
                        name, 
                        X_test_smiles, 
                        test_probs, 
                        None, 
                        cutoff=cutoffs[name]
                    )
            
            # 保存预测结果和阈值
            pred_df = pd.DataFrame({
                **predictions,
                **{f"{name}_cutoff": [cutoffs[name]]*len(X_test_smiles) for name in classifiers}
            })
            pred_df.to_csv("test_predictions_with_cutoffs.csv", index=False)
            print("✅ Predictions saved to test_predictions_with_cutoffs.csv")


if __name__ == "__main__":
    main()

