## Home assignment 05: Bagging and OOB score

Пожалуйста, заполните строки кода ниже.
Это упрощенная версия BaggedRegressor из sklearn. Обратите внимание, что API `sklearn` **не сохранился**.

Ваш алгоритм должен иметь возможность обучать различные экземпляры одного и того же класса модели на загрузочных наборах данных и предоставлять [OOB score](https://en.wikipedia.org/wiki/Out-of-bag_error)  для обучающего набора.

Модель следует передавать как класс модели без явных параметров и круглых скобок.

Example:
```
import numpy as np
from sklearn.linear_model import LinearRegression

bagging_regressor = SimplifiedBaggingRegressor(num_bags=10, oob=True)
bagging_regressor.fit(LinearRegression, X, y)

```

In [2]:
import numpy as np
import random

In [None]:
class SimplifiedBaggingRegressor:
	def __init__(self, num_bags, oob=False):
		self.num_bags = num_bags
		self.oob = oob
		
	def _generate_splits(self, data: np.ndarray):
		'''
		Сгенерируйте индексы для каждой сумки и сохраните их в списке self.indices_list.
		'''
		self.indices_list = []
		data_length = len(data)

		self.flag_indices = [1] * data_length
		for bag in range(self.num_bags):
			bag_indices = []
			cur_flag = [0] * data_length

			for obj in random.randint(0, data_length):
				bag_indices.append(obj)
				self.flag_indices[obj] = 1

			self.flag_indices = self.flag_indices * cur_flag
			self.indices_list.append(bag_indices)
		
	def fit(self, model_constructor, data, target):
		'''
		Fit model on every bag.
		Model constructor with no parameters (and with no ()) is passed to this function.
		
		example:
		
		bagging_regressor = SimplifiedBaggingRegressor(num_bags=10, oob=True)
		bagging_regressor.fit(LinearRegression, X, y)
		'''
		self.data = None
		self.target = None
		self._generate_splits(data)
		assert len(set(list(map(len, self.indices_list)))) == 1, 'All bags should be of the same length!'
		assert list(map(len, self.indices_list))[0] == len(data), 'All bags should contain `len(data)` number of elements!'
		self.models_list = []
		for bag in range(self.num_bags):
			model = model_constructor()
			data_bag, target_bag = data[self.indices_list[bag]], target[self.indices_list[bag]]
			self.models_list.append(model.fit(data_bag, target_bag)) # store fitted models here
		if self.oob:
			self.data = data
			self.target = target
		
	def predict(self, data):
		'''
		Get average prediction for every object from passed dataset
		'''
		predictions = []
		for model in self.models_list:
			tmp_pred = model.predict(data)
			predictions.append(tmp_pred)
			
		return np.mean(predictions, axis=1)

	def _get_oob_predictions_from_every_model(self):
		'''	
		Генерирует список списков, где список i содержит прогнозы для объекта self.data[i]
		от всех моделей, которые не видели этот объект на этапе обучения
		'''
		list_of_predictions_lists = [[] for _ in range(len(self.data))]
		for i in range(len(self.data)):
			# k = 0
			# for index_list in self.indices_list:
			# 	if i in index_list:
			# 		k += 1
					
			# if k == self.num_bags:
			# 	list_of_predictions_lists[i].append(None)
			# else:
			list_of_predictions_lists[i].append(self.predict(self.data[i]))
		
		self.list_of_predictions_lists = np.array(list_of_predictions_lists, dtype=object)

	def _get_averaged_oob_predictions(self):
		'''
		Вычислите средний прогноз для каждого объекта из обучающего набора.
		Если объект использовался во всех сумках на этапе обучения, верните None вместо прогноза.
		'''
		self._get_oob_predictions_from_every_model()
		self.oob_predictions = 
		
		
	def OOB_score(self):
		'''
		Compute mean square error for all objects, which have at least one prediction
		'''
		self._get_averaged_oob_predictions()
		return # Your Code Here

### Local tests:

In [None]:
from sklearn.linear_model import LinearRegression
from tqdm.auto import tqdm

#### Simple tests:

In [None]:
for _ in tqdm(range(100)):
    X = np.random.randn(2000, 10)
    y = np.mean(X, axis=1)
    bagging_regressor = SimplifiedBaggingRegressor(num_bags=10, oob=True)
    bagging_regressor.fit(LinearRegression, X, y)
    predictions = bagging_regressor.predict(X)
    assert np.mean((predictions - y)**2) < 1e-6, 'Linear dependency should be fitted with almost zero error!'
    assert bagging_regressor.oob, 'OOB feature must be turned on'
    oob_score = bagging_regressor.OOB_score()
    assert oob_score < 1e-6, 'OOB error for linear dependency should be also close to zero!'
    assert abs(
        np.mean(
            list(map(len, bagging_regressor.list_of_predictions_lists))
        ) / bagging_regressor.num_bags - 1/np.exp(1)) < 0.1, 'Probability of missing a bag should be close to theoretical value!'
    
print('Simple tests done!')

#### Medium tests

In [None]:
for _ in tqdm(range(10)):
    X = np.random.randn(200, 150)
    y = np.random.randn(len(X))
    bagging_regressor = SimplifiedBaggingRegressor(num_bags=20, oob=True)
    bagging_regressor.fit(LinearRegression, X, y)
    predictions = bagging_regressor.predict(X)
    average_train_error = np.mean((predictions - y)**2)
    assert bagging_regressor.oob, 'OOB feature must be turned on'
    oob_score = bagging_regressor.OOB_score()
    assert oob_score > average_train_error, 'OOB error must be higher than train error due to overfitting!'
    assert abs(
        np.mean(
            list(map(len, bagging_regressor.list_of_predictions_lists))
        ) / bagging_regressor.num_bags - 1/np.exp(1)) < 0.1, 'Probability of missing a bag should be close to theoretical value!'
    
print('Medium tests done!')

#### Complex tests:

In [None]:
for _ in tqdm(range(10)):
    X = np.random.randn(2000, 15)
    y = np.random.randn(len(X))
    bagging_regressor = SimplifiedBaggingRegressor(num_bags=100, oob=True)
    bagging_regressor.fit(LinearRegression, X, y)
    predictions = bagging_regressor.predict(X)
    oob_score = bagging_regressor.OOB_score()
    assert abs(
        np.mean(
            list(map(len, bagging_regressor.list_of_predictions_lists))
        ) / bagging_regressor.num_bags - 1/np.exp(1)) < 1e-2, 'Probability of missing a bag should be close to theoretical value!'
    
print('Complex tests done!')

In [None]:
np.mean(
            list(map(len, bagging_regressor.list_of_predictions_lists))
        ) / bagging_regressor.num_bags - 1/np.exp(1)

Great job! Please, save `SimplifiedBaggingRegressor` to  `bagging.py` and submit your solution to the grading system!