In [1]:
import pandas as pd
import os
from pathlib import Path
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression


def run_pipeline(raw_csv: str, out_csv: str) -> dict:
    df = pd.read_csv(raw_csv)

    X = df.drop(columns=['Collision'])
    y = df['Collision'].map({'Да': 1, 'Нет': 0})

    for col in X.columns:
        if X[col].dtype == object:
            X[col] = LabelEncoder().fit_transform(X[col])

    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('lr', LogisticRegression(max_iter=1000))
    ])

    pipe.fit(X, y)
    df['pred'] = pipe.predict(X)
    df.to_csv(out_csv, index=False)
    acc = (df['pred'] == y).mean()
    return {
        'script': 'script1',
        'accuracy': acc,
        'n_rows': len(df)
    }


def main():
    input_folder = Path(r"datasets_storage")
    output_folder = Path(r"outputs")
    output_folder.mkdir(exist_ok=True)
    results = []

    for file_path in input_folder.glob("*.csv"):
        out_file = output_folder / f"pred_{file_path.name}"
        print(f"Обрабатываем {file_path.name} ...")
        res = run_pipeline(str(file_path), str(out_file))
        print(f"Сохранено в {out_file}\n")
        results.append(res)

    print("Итоги:")
    for r in results:
        print(r)


if __name__ == "__main__":
    main()

Обрабатываем dataset_10_rows1316_feats4.csv ...
Сохранено в outputs\pred_dataset_10_rows1316_feats4.csv

Обрабатываем dataset_11_rows1016_feats9.csv ...
Сохранено в outputs\pred_dataset_11_rows1016_feats9.csv

Обрабатываем dataset_12_rows1196_feats12.csv ...
Сохранено в outputs\pred_dataset_12_rows1196_feats12.csv

Обрабатываем dataset_1_rows61_feats6.csv ...
Сохранено в outputs\pred_dataset_1_rows61_feats6.csv

Обрабатываем dataset_2_rows66_feats8.csv ...
Сохранено в outputs\pred_dataset_2_rows66_feats8.csv

Обрабатываем dataset_3_rows96_feats15.csv ...
Сохранено в outputs\pred_dataset_3_rows96_feats15.csv

Обрабатываем dataset_4_rows309_feats4.csv ...
Сохранено в outputs\pred_dataset_4_rows309_feats4.csv

Обрабатываем dataset_5_rows342_feats10.csv ...
Сохранено в outputs\pred_dataset_5_rows342_feats10.csv

Обрабатываем dataset_6_rows360_feats11.csv ...
Сохранено в outputs\pred_dataset_6_rows360_feats11.csv

Обрабатываем dataset_7_rows578_feats5.csv ...
Сохранено в outputs\pred_datase

In [2]:
import pandas as pd
from pathlib import Path
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier


def run_pipeline(raw_csv: str, out_csv: str) -> dict:
    df = pd.read_csv(raw_csv)

    X = df.drop(columns=['Collision'])
    y = df['Collision'].map({'Да': 1, 'Нет': 0})

    for col in X.columns:
        if X[col].dtype == object:
            X[col] = LabelEncoder().fit_transform(X[col])

    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('dt', DecisionTreeClassifier(max_depth=5))
    ])

    pipe.fit(X, y)
    df['pred'] = pipe.predict(X)
    df.to_csv(out_csv, index=False)
    accuracy = (df['pred'] == y).mean()
    return {'script': 'script2', 'accuracy': accuracy, 'n_rows': len(df)}


def main():
    input_dir = Path(
        r"datasets_storage")
    output_dir = Path(
        r"outputs")
    output_dir.mkdir(exist_ok=True)

    results = []
    for file_path in input_dir.glob("*.csv"):
        out_file = output_dir / f"pred_{file_path.name}"
        print(f"Processing {file_path.name} ...")
        res = run_pipeline(str(file_path), str(out_file))
        print(f"Saved predictions to {out_file}\n")
        results.append(res)

    print("Summary:")
    for r in results:
        print(r)


if __name__ == '__main__':
    main()

Processing dataset_10_rows1316_feats4.csv ...
Saved predictions to outputs\pred_dataset_10_rows1316_feats4.csv

Processing dataset_11_rows1016_feats9.csv ...
Saved predictions to outputs\pred_dataset_11_rows1016_feats9.csv

Processing dataset_12_rows1196_feats12.csv ...
Saved predictions to outputs\pred_dataset_12_rows1196_feats12.csv

Processing dataset_1_rows61_feats6.csv ...
Saved predictions to outputs\pred_dataset_1_rows61_feats6.csv

Processing dataset_2_rows66_feats8.csv ...
Saved predictions to outputs\pred_dataset_2_rows66_feats8.csv

Processing dataset_3_rows96_feats15.csv ...
Saved predictions to outputs\pred_dataset_3_rows96_feats15.csv

Processing dataset_4_rows309_feats4.csv ...
Saved predictions to outputs\pred_dataset_4_rows309_feats4.csv

Processing dataset_5_rows342_feats10.csv ...
Saved predictions to outputs\pred_dataset_5_rows342_feats10.csv

Processing dataset_6_rows360_feats11.csv ...
Saved predictions to outputs\pred_dataset_6_rows360_feats11.csv

Processing dat

In [3]:
import pandas as pd
from pathlib import Path
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier


def run_pipeline(raw_csv: str, out_csv: str) -> dict:
    df = pd.read_csv(raw_csv)

    X = df.drop(columns=['Collision'])
    y = df['Collision'].map({'Да': 1, 'Нет': 0})

    for col in X.columns:
        if X[col].dtype == object:
            X[col] = LabelEncoder().fit_transform(X[col])

    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('knn', KNeighborsClassifier(n_neighbors=5))
    ])

    pipe.fit(X, y)
    df['pred'] = pipe.predict(X)
    df.to_csv(out_csv, index=False)
    accuracy = (df['pred'] == y).mean()
    return {'script': 'script3', 'accuracy': accuracy, 'n_rows': len(df)}


def main():
    input_dir = Path(r"datasets_storage")
    output_dir = Path(r"outputs")
    output_dir.mkdir(exist_ok=True)

    results = []
    for file_path in input_dir.glob("*.csv"):
        out_file = output_dir / f"pred_{file_path.name}"
        print(f"Processing {file_path.name} ...")
        res = run_pipeline(str(file_path), str(out_file))
        print(f"Saved predictions to {out_file}\n")
        results.append(res)

    print("Summary:")
    for r in results:
        print(r)


if __name__ == '__main__':
    main()

Processing dataset_10_rows1316_feats4.csv ...
Saved predictions to outputs\pred_dataset_10_rows1316_feats4.csv

Processing dataset_11_rows1016_feats9.csv ...
Saved predictions to outputs\pred_dataset_11_rows1016_feats9.csv

Processing dataset_12_rows1196_feats12.csv ...
Saved predictions to outputs\pred_dataset_12_rows1196_feats12.csv

Processing dataset_1_rows61_feats6.csv ...
Saved predictions to outputs\pred_dataset_1_rows61_feats6.csv

Processing dataset_2_rows66_feats8.csv ...
Saved predictions to outputs\pred_dataset_2_rows66_feats8.csv

Processing dataset_3_rows96_feats15.csv ...
Saved predictions to outputs\pred_dataset_3_rows96_feats15.csv

Processing dataset_4_rows309_feats4.csv ...
Saved predictions to outputs\pred_dataset_4_rows309_feats4.csv

Processing dataset_5_rows342_feats10.csv ...
Saved predictions to outputs\pred_dataset_5_rows342_feats10.csv

Processing dataset_6_rows360_feats11.csv ...
Saved predictions to outputs\pred_dataset_6_rows360_feats11.csv

Processing dat

In [4]:
import pandas as pd
from pathlib import Path
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC


def run_pipeline(raw_csv: str, out_csv: str) -> dict:
    df = pd.read_csv(raw_csv)

    X = df.drop(columns=['Collision'])
    y = df['Collision'].map({'Да': 1, 'Нет': 0})

    for col in X.columns:
        if X[col].dtype == object:
            X[col] = LabelEncoder().fit_transform(X[col])

    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('svc', SVC(kernel='rbf', C=1.0, gamma='scale'))
    ])

    pipe.fit(X, y)
    df['pred'] = pipe.predict(X)
    df.to_csv(out_csv, index=False)
    accuracy = (df['pred'] == y).mean()
    return {'script': 'script4', 'accuracy': accuracy, 'n_rows': len(df)}


def main():
    input_dir = Path(r"datasets_storage")
    output_dir = Path(r"outputs")
    output_dir.mkdir(exist_ok=True)

    results = []
    for file_path in input_dir.glob("*.csv"):
        out_file = output_dir / f"pred_{file_path.name}"
        print(f"Processing {file_path.name} ...")
        res = run_pipeline(str(file_path), str(out_file))
        print(f"Saved predictions to {out_file}\n")
        results.append(res)

    print("Summary:")
    for r in results:
        print(r)


if __name__ == '__main__':
    main()

Processing dataset_10_rows1316_feats4.csv ...
Saved predictions to outputs\pred_dataset_10_rows1316_feats4.csv

Processing dataset_11_rows1016_feats9.csv ...
Saved predictions to outputs\pred_dataset_11_rows1016_feats9.csv

Processing dataset_12_rows1196_feats12.csv ...
Saved predictions to outputs\pred_dataset_12_rows1196_feats12.csv

Processing dataset_1_rows61_feats6.csv ...
Saved predictions to outputs\pred_dataset_1_rows61_feats6.csv

Processing dataset_2_rows66_feats8.csv ...
Saved predictions to outputs\pred_dataset_2_rows66_feats8.csv

Processing dataset_3_rows96_feats15.csv ...
Saved predictions to outputs\pred_dataset_3_rows96_feats15.csv

Processing dataset_4_rows309_feats4.csv ...
Saved predictions to outputs\pred_dataset_4_rows309_feats4.csv

Processing dataset_5_rows342_feats10.csv ...
Saved predictions to outputs\pred_dataset_5_rows342_feats10.csv

Processing dataset_6_rows360_feats11.csv ...
Saved predictions to outputs\pred_dataset_6_rows360_feats11.csv

Processing dat

In [5]:
import pandas as pd
import pickle
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder


def run_pipeline(raw_csv: str, out_csv: str) -> dict:
    df = pd.read_csv(raw_csv)

    X = df.drop('Collision', axis=1)
    y = df['Collision'].map({'Да': 1, 'Нет': 0})

    for col in X.columns:
        if X[col].dtype == object:
            X[col] = LabelEncoder().fit_transform(X[col])

    with open(r"saved_models\optimized_lr_ga.pkl", 'rb') as f:
        best_lr = pickle.load(f)

    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('lr', best_lr)
    ])

    pipe.fit(X, y)
    df['pred'] = pipe.predict(X)
    df.to_csv(out_csv, index=False)
    accuracy = (df['pred'] == y).mean()
    return {'script': 'script5', 'accuracy': accuracy, 'n_rows': len(df)}


if __name__ == '__main__':
    import sys
    input_file = r"datasets_storage\dataset_1_rows61_feats6.csv"
    output_file = r"outputs\script5_output.csv"
    result = run_pipeline(input_file, output_file)
    print(result)

{'script': 'script5', 'accuracy': np.float64(0.8032786885245902), 'n_rows': 61}


In [6]:
import pandas as pd
import pickle
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC


def train_and_save_model(input_path: str, save_path: str) -> dict:
    df = pd.read_csv(input_path)
    X = df.drop(columns=['Collision'])
    y = df['Collision'].map({'Да': 1, 'Нет': 0})

    for col in X.columns:
        if X[col].dtype == object:
            X[col] = LabelEncoder().fit_transform(X[col])

    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('svc', SVC(kernel='rbf', C=1.0, gamma='scale'))
    ])

    pipe.fit(X, y)

    with open(save_path, 'wb') as f:
        pickle.dump(pipe.named_steps['svc'], f)

    preds = pipe.predict(X)
    accuracy = (preds == y).mean()
    print(f"Модель сохранена в {save_path}")
    return {'script': 'script6', 'accuracy': accuracy, 'rows': len(df)}


if __name__ == "__main__":
    input_file = r"datasets_storage\dataset_1_rows61_feats6.csv"
    save_file = r"saved_models\optimized_svc_pso.pkl"
    results = train_and_save_model(input_file, save_file)
    print(results)

Модель сохранена в saved_models\optimized_svc_pso.pkl
{'script': 'script6', 'accuracy': np.float64(0.8688524590163934), 'rows': 61}
