In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from catboost import Pool, CatBoostClassifier, cv, CatBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score, f1_score
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, AdaBoostRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.inspection import permutation_importance
from sklearn.preprocessing import RobustScaler
from scipy.interpolate import interp1d
from scipy.fft import fft
from tqdm import tqdm
from joblib import dump, load

import pickle

In [5]:
TRAIN_REG_LINK = "mars-train-regr.csv"
TRAIN_CLASS_LINK = "mars-train-class.csv"
PUBLIC_CLASS_LINK = "mars-public_test-class.csv"
PRIVAT_REG_LINK = "mars-private_test-reg.csv"
PRIVAT_CLASS_LINK = "mars-private_test-class.csv"

In [6]:
class Ensemble:
    def __init__(self, models, train, top_5_least_important, line=1, task="regr"):
        
        x_coord = train['Дальность (м)'] * np.sin(train['Азимут']) * np.cos(train['У.М.'])
        y_coord = train['Дальность (м)'] * np.cos(train['Азимут']) * np.cos(train['У.М.'])
        z_coord = train['Дальность (м)'] * np.sin(train['У.М.'])
        
        train['x_coord'] = x_coord
        train['y_coord'] = y_coord
        train['z_coord'] = z_coord
                
        
        self.train = train
        self.models = models
        self.top_5_least_important = top_5_least_important
        self.task = task   
        
        
        if self.task == 'regr':
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.train.drop('Доля сигнала в ВП', axis=1),
                                                    train['Доля сигнала в ВП'], test_size=0.1, shuffle=True)
        if self.task == 'class':
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.train.drop('Тип марсианина', axis=1),
                                                    train['Тип марсианина'], test_size=0.1, shuffle=True)
        self.train_cb = Pool(data=self.X_train,
             label=self.y_train)
        self.test_cb = Pool(data=self.X_test,
             label=self.y_test)
        self.line = line
        #self.meta_model = meta_model
    
    
    def fit(self):
        scores = []
        df = self.X_test.copy()
        sums = 0
        n = len(self.models)
        for model, name in tqdm(self.models, desc='Обучение моделей', ncols=100):
            if name[:2] != 'cb':
                
                model.fit(self.X_train, self.y_train)
            else:
                model.fit(self.train_cb)
            preds = model.predict(self.X_test)
            if self.task == 'regr':
                score = r2_score(np.array(self.y_test), preds)*70
            if self.task == 'class':
                score = mean_squared_error(np.array(self.y_test), preds)
            scores.append({name: score})
            sums += preds
        
        if self.task == 'class':
            sums /= n
            sums[sums <= self.line] = 0
            sums[sums > self.line] = 1
            total_score = f1_score(np.array(self.y_test), sums)*30
        if self.task == 'regr':
            sums /= n
#             self.pub_tests['Доля сигнала в ВП'] = sums
            total_score = r2_score(np.array(self.y_test), sums)*70
        

            
        return scores, total_score
    
    def predict(self, test):
        n = len(self.models)
        
        x_coord = test['Дальность (м)'] * np.sin(test['Азимут']) * np.cos(test['У.М.'])
        y_coord = test['Дальность (м)'] * np.cos(test['Азимут']) * np.cos(test['У.М.'])
        z_coord = test['Дальность (м)'] * np.sin(test['У.М.'])
        test['x_coord'] = x_coord
        test['y_coord'] = y_coord
        test['z_coord'] = z_coord
        
        
        preds = []
        for model, name in self.models:
            preds_model = model.predict(test)
            preds.append((name, preds_model))
        
        sums = 0
        for name, pred in preds:
            sums += pred
        if self.task == 'class':
            sums /= n
            sums[sums <= self.line] = 0
            sums[sums > self.line] = 1
        if self.task == 'regr':
            sums /= n
        
        return preds, sums
            
        

In [7]:
MODELS_REG_LINK = "models_reg.joblib"
with open(MODELS_REG_LINK, "rb") as file:
    models_reg = load(file)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [8]:
for i in range(1, 4):
    model = CatBoostRegressor()
    model.load_model(f'cb_{i}')
    models_reg.append((model, f'cb_{i}'))
models_reg

[(RandomForestRegressor(max_depth=19, n_estimators=1000, n_jobs=-1,
                        random_state=1010),
  'rf_1'),
 (RandomForestRegressor(max_depth=21, n_estimators=1000, n_jobs=-1,
                        random_state=1010),
  'rf_2'),
 (RandomForestRegressor(max_depth=20, n_estimators=1000, n_jobs=-1,
                        random_state=1010),
  'rf_3'),
 (<catboost.core.CatBoostRegressor at 0x7fe626953f70>, 'cb_1'),
 (<catboost.core.CatBoostRegressor at 0x7fe6ca356170>, 'cb_2'),
 (<catboost.core.CatBoostRegressor at 0x7fe734561c90>, 'cb_3')]

In [9]:
train = pd.read_csv(TRAIN_REG_LINK)
privat_reg_tests = pd.read_csv(PRIVAT_REG_LINK)
privat_class_tests = pd.read_csv(PRIVAT_CLASS_LINK)
train_b = pd.read_csv(TRAIN_CLASS_LINK)
test_b = pd.read_csv(PUBLIC_CLASS_LINK)
features = train.columns
train_b = train_b[features]
test_b = test_b[features]
train_plus = pd.concat([train, train_b, test_b], axis=0, ignore_index=True)


ensemble_reg = Ensemble(models_reg, train_plus, [], "regr")

models_preds, preds_reg = ensemble_reg.predict(privat_reg_tests)
privat_reg_tests = pd.read_csv(PRIVAT_REG_LINK)
privat_reg_tests['Доля сигнала в ВП'] = preds_reg
privat_reg_tests.to_csv('submit_reg.csv')
privat_reg_tests

Unnamed: 0,№ испытания,Модуль сигнала,Тип_измерения,Количество импульсов,Фаза Hor,Фаза Ver,Уровень шума,Азимут,У.М.,Секунда,Дальность (м),Доля сигнала в ВП
0,1,1871.18,1,1,1.570796,1.570796,82.898627,-25.933488,12.710121,1.870874e+03,3021.302195,0.809009
1,1,439.49,1,1,2.689454,2.348535,78.416870,5.661776,10.063751,1.870958e+03,4561.907406,0.528414
2,1,582.74,1,1,1.666902,0.039996,88.691199,5.477397,10.037053,1.871125e+03,4565.950292,0.357070
3,1,5077.17,1,2,0.973110,-0.650704,100.990251,29.508055,12.298665,1.871139e+03,2860.434106,0.921425
4,1,2352.11,1,1,1.570796,1.570796,88.746725,-25.650543,12.796875,1.871293e+03,3008.581395,0.770235
...,...,...,...,...,...,...,...,...,...,...,...,...
4485,3,1115.39,4,1,0.597828,0.959746,81.411318,-39.735145,68.914096,1.476154e+06,990.272386,0.834532
4486,3,1098.99,4,1,-2.363914,-1.960116,80.193708,-39.750193,68.915504,1.476154e+06,990.262177,0.792775
4487,3,1100.36,4,1,-3.116986,-2.937334,80.013248,-39.765084,68.916933,1.476154e+06,990.251968,0.782360
4488,3,861.69,1,2,-0.094091,0.062716,88.064546,21.025967,21.140037,1.476154e+06,2130.172412,0.892452


In [10]:
models_class = []
for i in range(1, 4):
    model = CatBoostRegressor()
    model.load_model(f'cb_class_{i}')
    models_class.append((model, f'cb_class_{i}'))
models_class

[(<catboost.core.CatBoostRegressor at 0x7fe626951900>, 'cb_class_1'),
 (<catboost.core.CatBoostRegressor at 0x7fe6269505b0>, 'cb_class_2'),
 (<catboost.core.CatBoostRegressor at 0x7fe734563850>, 'cb_class_3')]

In [11]:
train = pd.read_csv(TRAIN_CLASS_LINK)
privat_class_tests = pd.read_csv(PRIVAT_CLASS_LINK)

ensemble_class = Ensemble(models_class, train, [], line=0.5, task='class')

models_preds, preds_class = ensemble_class.predict(privat_class_tests)
privat_class_tests = pd.read_csv(PRIVAT_CLASS_LINK)
privat_class_tests['Тип марсианина'] = preds_class
privat_class_tests.to_csv('submit_class.csv')
privat_class_tests

Unnamed: 0,№ испытания,Модуль сигнала,Тип_измерения,Количество импульсов,Фаза Hor,Фаза Ver,Уровень шума,Азимут,У.М.,Секунда,Дальность (м),Доля сигнала в ВП,Тип марсианина
0,1,3034.42,4,1,1.570796,1.570796,82.289822,-21.796051,14.470102,1.876713e+03,2846.712794,0.702408,0.0
1,1,3007.74,4,1,-1.177580,0.042901,99.758760,-21.793055,14.471338,1.876713e+03,2846.600492,0.777598,0.0
2,1,3324.61,4,1,2.008799,3.019872,80.054893,-21.789997,14.472575,1.876713e+03,2846.477980,0.815011,0.0
3,1,3182.46,4,1,-0.762080,0.362214,78.393073,-21.787002,14.473810,1.876727e+03,2846.365678,0.889261,0.0
4,1,3174.68,4,1,-3.002787,-1.869907,93.309789,-21.783943,14.475048,1.876727e+03,2846.253375,0.761554,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2550,3,691.73,4,1,0.090151,0.881038,76.344553,-21.593132,65.897805,1.476151e+06,1010.507238,0.552768,1.0
2551,3,825.93,4,1,-0.098148,0.608125,95.308732,-21.604149,65.900535,1.476151e+06,1010.486819,0.371986,1.0
2552,3,983.40,4,1,0.521566,1.706500,84.191594,-21.615169,65.903265,1.476151e+06,1010.466401,0.320904,1.0
2553,3,950.55,4,1,1.879662,2.728574,84.120203,-21.637076,65.908734,1.476151e+06,1010.425563,0.356461,1.0
