In [1]:
import pandas as pd
import numpy as np
import re
import os
import logging
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger()


class TrainingDataPreprocessor(BaseEstimator, TransformerMixin):
    """
    Класс для подготовки обучающих данных.
    Создает еженедельные "снимки" с обнулением будущих недель
    и формированием целевого признака risk_status.
    """
    def __init__(
        self,
        target_prefix='risk_status',
        test_size=0.25,
        random_state=42
    ):
        self.target_prefix = target_prefix
        self.test_size = test_size
        self.random_state = random_state

    @staticmethod
    def _get_week_from_col(col_name: str) -> int:
        """Извлекает номер недели из имени столбца."""
        match = re.search(r'_(\d+)_week$', col_name)
        return int(match.group(1)) if match else -1

    def _create_weekly_snapshots(self, df: pd.DataFrame) -> pd.DataFrame:
        """Создает датасет из еженедельных срезов с обнулением будущих недель."""
        all_records = []
        risk_status_cols = [
            col for col in df.columns if col.startswith(self.target_prefix)
        ]
        id_cols = ['user_id', 'course_id']
        original_feature_cols = [
            col for col in df.columns
            if col not in id_cols and not col.startswith(self.target_prefix)
        ]

        for week_num in range(1, 14):
            target_col = f'{self.target_prefix}_{week_num + 1}_week'
            if target_col not in df.columns:
                continue

            # Формируем DataFrame для текущей недели
            week_df = df.copy()
            week_df['risk_status'] = week_df[target_col]
            week_df['week'] = week_num

            # Обнуляем будущие недели одним проходом
            week_cols = [
                col for col in risk_status_cols
                if self._get_week_from_col(col) > week_num
            ]
            week_df[week_cols] = 0

            all_records.append(week_df)

        if not all_records:
            logger.warning("Не удалось создать еженедельные срезы данных.")
            return pd.DataFrame()

        combined_df = pd.concat(all_records, ignore_index=True)
        combined_df = combined_df.drop(columns=risk_status_cols, errors='ignore')

        final_order = (
            ['week'] + id_cols + original_feature_cols + ['risk_status']
        )
        final_order_existing = [
            col for col in final_order if col in combined_df.columns
        ]
        return combined_df[final_order_existing]

    def process(self, raw_data_paths: list) -> (pd.DataFrame, pd.DataFrame):
        logger.info("Загрузка и объединение данных...")
        df_list = [pd.read_csv(path) for path in raw_data_paths]
        if len(df_list) > 2:
            df_list[2]['course_id'] += 8000
        df_combined = pd.concat(df_list, ignore_index=True)

        logger.info("Разделение на обучающую и тестовую выборки...")
        train_df, test_df = train_test_split(
            df_combined,
            test_size=self.test_size,
            random_state=self.random_state
        )

        logger.info("Формирование обучающего набора данных...")
        train_combined_df = self._create_weekly_snapshots(train_df)

        logger.info("Формирование тестового набора данных...")
        test_combined_df = self._create_weekly_snapshots(test_df)

        logger.info("Предобработка завершена.")
        return train_combined_df, test_combined_df


In [2]:
# from training_data_preprocessor import TrainingDataPreprocessor  # Импортируйте ваш класс

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger()

if __name__ == '__main__':
    RAW_DATA_PATHS = [
        './original_dataset/13_week_data1.csv',
        './original_dataset/13_week_data2.csv',
        './original_dataset/13_week_bas.csv'
    ]
    SAVE_DIR = './saved_datasets'
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Загружаем все датасеты
    df1 = pd.read_csv(RAW_DATA_PATHS[0])
    df2 = pd.read_csv(RAW_DATA_PATHS[1])
    df3 = pd.read_csv(RAW_DATA_PATHS[2])

    # Корректируем course_id только для БАС
    df3['course_id'] += 8000

    # Собираем список датафреймов для препроцессора
    df_list = [df1, df2, df3]

    # Сохраняем временные файлы
    temp_paths = []
    for i, df in enumerate(df_list):
        temp_path = os.path.join(SAVE_DIR, f'temp_input_{i}.csv')
        df.to_csv(temp_path, index=False)
        temp_paths.append(temp_path)

    # Запускаем препроцессор
    preprocessor = TrainingDataPreprocessor()
    train_combined_df, test_combined_df = preprocessor.process(temp_paths)

    # Сохраняем результаты
    train_combined_df.to_csv(
        os.path.join(SAVE_DIR, 'train_combined.csv'), index=False
    )
    test_combined_df.to_csv(
        os.path.join(SAVE_DIR, 'test_combined.csv'), index=False
    )

    logger.info("Данные сохранены.")

    # (Опционально) Удаляем временные файлы
    for path in temp_paths:
        os.remove(path)


2025-07-04 15:34:07,287 - INFO - Загрузка и объединение данных...
2025-07-04 15:34:07,600 - INFO - Разделение на обучающую и тестовую выборки...
2025-07-04 15:34:07,638 - INFO - Формирование обучающего набора данных...
2025-07-04 15:34:07,887 - INFO - Формирование тестового набора данных...
2025-07-04 15:34:07,989 - INFO - Предобработка завершена.
2025-07-04 15:34:19,252 - INFO - Данные сохранены.


In [6]:
def sum_of_digits(num):
    cnt = 0
    while num > 0:
        last_num = num % 10
        cnt += last_num
        num //= 10
    return cnt

numbers = [int(num) for num in input().split()]
print(*sorted(numbers, key=sum_of_digits))

12 111 4 14 123 7 45 90 79


In [14]:
z = [[1,   2,   3,   4],
     [5,   6,   7,   8],
     [9,   10,  11,  12],
     [13,  14,  15]]

In [32]:
all([True, False])	

all([False, False])	

all([True, True])	

all([10, 100, 1000])	

all([10, 100, 0, 1000])	

all(['Python', 'C#'])	

all(['school', '', 'language'])	

all([(1, 2, 3), []])	

all([])	

all([[], []])	

all({0: 'Monday', 1: 'Tuesday', 2: 'Wednesday'})	

all({'name': 'Timur', 'age': 28})	

all({'': 'None', 'age': 28})	

False

In [72]:
def check_ip(s):
    ip = s.split('.')
    return (
        len(ip) == 4 and
        all(part.isdigit() and 0 <= int(part) <= 255 for part in ip)
    )


print(check_ip('10.0.0.0'))

True


In [86]:
num = int('1000001', 2)
print(num)

65


In [88]:
def generate_letter(mail, name, date, time, place, teacher='Тимур Гуев', number=17):
    letter = (
        f"To: {mail}\n"
        f"Приветствую, {name}!\n"
        f"Вам назначен экзамен, который пройдет {date}, в {time}.\n"
        f"По адресу: {place}.\n"
        f"Экзамен будет проводить {teacher} в кабинете {number}.\n"
        f"Желаем удачи на экзамене!"
    )
    return letter

print(generate_letter('lara@yandex.ru', 'Лариса', '10 декабря', '12:00', 'Часова 23, корпус 2'))
print()
print(generate_letter('lara@yandex.ru', 'Лариса', '10 декабря', '12:00', 
                      'Часова 23, корпус 2', 'Василь Ярошевич', 23))

To: lara@yandex.ru
Приветствую,Лариса!
Вам назначен экзамен, который пройдет 10 декабря, в 12:00.
По адресу: Часова 23, корпус 2.
Экзамен будет проводить Тимур Гуев в кабинете 17.
Желаем удачи на экзамене!

To: lara@yandex.ru
Приветствую,Лариса!
Вам назначен экзамен, который пройдет 10 декабря, в 12:00.
По адресу: Часова 23, корпус 2.
Экзамен будет проводить Василь Ярошевич в кабинете 23.
Желаем удачи на экзамене!


In [104]:
def pretty_print(data, side='-', delimiter='|'):
    content = f' {delimiter} ' + f' {delimiter} '.join(str(x) for x in data) + f' {delimiter} '
    frame = side * len(content)
    print(frame)
    print(content)
    print(frame)


In [113]:
from functools import reduce
import operator

def flatten(data):
    return reduce(operator.concat, data, [])

result = flatten([[1, 2], [3, 4], [], [5]])

print(result)

[1, 2, 3, 4, 5]


In [118]:
def concat(*args, sep=' '):
    return sep.join(map(str, args))


print(concat('hello', 'python', 'and', 'stepik'))
print(concat('hello', 'python', 'and', 'stepik', sep='*'))
print(concat('hello', 'python', sep='()()()'))
print(concat('hello', sep='()'))
print(concat(1, 2, 3, 4, 5, 6, 7, 8, 9, sep='$$'))

hello python and stepik
hello*python*and*stepik
hello()()()python
hello
1$$2$$3$$4$$5$$6$$7$$8$$9


In [129]:
from operator import *

def arithmetic_operation(operation):
    operations = {
        '+': add,
        '-': sub,
        '*': mul,
        '/': truediv
    }
    
    return lambda x, y: operations[operation](x, y)

add = arithmetic_operation('+')
div = arithmetic_operation('/')
print(add(10, 20))
print(div(20, 5))


30
4.0


In [140]:
def check_numbers(a, b):
    result = []
    for num in range(a, b + 1):
        s = str(num)
        if '0' in s:
            continue  # пропускаем числа с нулём
        if all(num % int(d) == 0 for d in s):
            result.append(num)
    return result



print(*check_numbers(input().split()))

22 24


In [151]:
with open(r'c:\Users\user\Downloads\nums.txt', encoding='utf-8') as file:
    text = file.read()
    
    total = 0
    num = ''
    for char in text:
        if char.isdigit():
            num += char
        else:
            if num:
                total += int(num)
                num = ''
    if num:
        total += int(num)

    print(total)


124410


In [157]:

with open(r'c:\Users\user\Downloads\file.txt', encoding='utf-8') as file:
    lines = 0
    words = 0
    letters = 0
    for line in file:
        lines += 1
        words += len(line.split())
        letters += sum(1 for c in line if c.isalpha())


    print('Input file contains:')
    print(letters, 'letters')
    print(words, 'words')
    print(lines, 'lines')

Input file contains:
1069 letters
229 words
12 lines


In [174]:
from random import choice
with open(r'c:\Users\user\Downloads\first_names.txt', encoding='utf-8') as first_names, open(r'c:\Users\user\Downloads\last_names.txt') as last_names:
    first_names = [name for name in first_names]
    last_names = [name for name in last_names]

    for i in range(3):
        print(choice(first_names).strip(), choice(last_names).strip())

David Trahan
Freddy Perryman
Erasmo Roldan


In [196]:
with open(r'c:\Users\user\Downloads\population.txt', encoding='utf-8') as data:
    d = {}
    for line in data:
        country, population = line.strip().split('\t')
        population = int(population)
        d[country] = population

    result = {country: pop for country, pop in d.items() if country.startswith('G') and pop >= 500_000}
    print(*result.keys(), sep='\n')


Germany
Ghana
Guatemala
Greece
Guinea
Georgia
Guinea-Bissau
Gabon
Guyana


In [197]:
filepath = r'c:\Users\user\Downloads\data.csv'

In [None]:
def read_csv():
    FILEPATH = r'c:\Users\user\Downloads\data.csv'
    with open(FILEPATH) as csv:
        lst = []
        d = {}
        lines = [line.split(',') for line in csv.readlines()]

        for i in range(1, len(lines)):
            for j in range (len(lines[0])):
                d[lines[0][j].strip()] = lines[i][j].strip()
            lst.append(d)

        return(lst)

read_csv()
    

[{'policyID': '142071',
  'statecode': 'FL',
  'county': 'CLAY COUNTY',
  'eq_site_limit': '705600',
  'hu_site_limit': '705600',
  'fl_site_limit': '705600',
  'fr_site_limit': '705600',
  'tiv_2011': '705600',
  'tiv_2012': '1010842.56',
  'eq_site_deductible': '14112',
  'hu_site_deductible': '35280',
  'fl_site_deductible': '0',
  'fr_site_deductible': '0',
  'point_latitude': '30.100628',
  'point_longitude': '-81.703751',
  'line': 'Residential',
  'construction': 'Masonry',
  'point_granularity': '1'},
 {'policyID': '142071',
  'statecode': 'FL',
  'county': 'CLAY COUNTY',
  'eq_site_limit': '705600',
  'hu_site_limit': '705600',
  'fl_site_limit': '705600',
  'fr_site_limit': '705600',
  'tiv_2011': '705600',
  'tiv_2012': '1010842.56',
  'eq_site_deductible': '14112',
  'hu_site_deductible': '35280',
  'fl_site_deductible': '0',
  'fr_site_deductible': '0',
  'point_latitude': '30.100628',
  'point_longitude': '-81.703751',
  'line': 'Residential',
  'construction': 'Masonry',

In [199]:
import pandas as pd
df = pd.read_csv(filepath)
df

Unnamed: 0,policyID,statecode,county,eq_site_limit,hu_site_limit,fl_site_limit,fr_site_limit,tiv_2011,tiv_2012,eq_site_deductible,hu_site_deductible,fl_site_deductible,fr_site_deductible,point_latitude,point_longitude,line,construction,point_granularity
0,119736,FL,CLAY COUNTY,498960.0,498960.0,498960.0,498960.0,498960.0,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1
1,448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0.0,0,0,30.063936,-81.707664,Residential,Masonry,3
2,206893,FL,CLAY COUNTY,190724.4,190724.4,190724.4,190724.4,190724.4,192476.78,0,0.0,0,0,30.089579,-81.700455,Residential,Wood,1
3,333743,FL,CLAY COUNTY,0.0,79520.76,0.0,0.0,79520.76,86854.48,0,0.0,0,0,30.063236,-81.707703,Residential,Wood,3
4,172534,FL,CLAY COUNTY,0.0,254281.5,0.0,254281.5,254281.5,246144.49,0,0.0,0,0,30.060614,-81.702675,Residential,Wood,1
5,785275,FL,CLAY COUNTY,0.0,515035.62,0.0,0.0,515035.62,884419.17,0,0.0,0,0,30.063236,-81.707703,Residential,Masonry,3
6,995932,FL,CLAY COUNTY,0.0,19260000.0,0.0,0.0,19260000.0,20610000.0,0,0.0,0,0,30.102226,-81.713882,Commercial,Reinforced Concrete,1
7,223488,FL,CLAY COUNTY,328500.0,328500.0,328500.0,328500.0,328500.0,348374.25,0,16425.0,0,0,30.102217,-81.707146,Residential,Wood,1
8,433512,FL,CLAY COUNTY,315000.0,315000.0,315000.0,315000.0,315000.0,265821.57,0,15750.0,0,0,30.118774,-81.704613,Residential,Wood,1
9,142071,FL,CLAY COUNTY,705600.0,705600.0,705600.0,705600.0,705600.0,1010842.56,14112,35280.0,0,0,30.100628,-81.703751,Residential,Masonry,1


In [5]:
square = lambda x: x ** 2
complex_numbers = [complex(i, j) for i in range(11) for j in range(1, 11)]

dct = {}
for i in complex_numbers:
    dct[i] = square(i)
print(dct)

{1j: (-1+0j), 2j: (-4+0j), 3j: (-9+0j), 4j: (-16+0j), 5j: (-25+0j), 6j: (-36+0j), 7j: (-49+0j), 8j: (-64+0j), 9j: (-81+0j), 10j: (-100+0j), (1+1j): 2j, (1+2j): (-3+4j), (1+3j): (-8+6j), (1+4j): (-15+8j), (1+5j): (-24+10j), (1+6j): (-35+12j), (1+7j): (-48+14j), (1+8j): (-63+16j), (1+9j): (-80+18j), (1+10j): (-99+20j), (2+1j): (3+4j), (2+2j): 8j, (2+3j): (-5+12j), (2+4j): (-12+16j), (2+5j): (-21+20j), (2+6j): (-32+24j), (2+7j): (-45+28j), (2+8j): (-60+32j), (2+9j): (-77+36j), (2+10j): (-96+40j), (3+1j): (8+6j), (3+2j): (5+12j), (3+3j): 18j, (3+4j): (-7+24j), (3+5j): (-16+30j), (3+6j): (-27+36j), (3+7j): (-40+42j), (3+8j): (-55+48j), (3+9j): (-72+54j), (3+10j): (-91+60j), (4+1j): (15+8j), (4+2j): (12+16j), (4+3j): (7+24j), (4+4j): 32j, (4+5j): (-9+40j), (4+6j): (-20+48j), (4+7j): (-33+56j), (4+8j): (-48+64j), (4+9j): (-65+72j), (4+10j): (-84+80j), (5+1j): (24+10j), (5+2j): (21+20j), (5+3j): (16+30j), (5+4j): (9+40j), (5+5j): 50j, (5+6j): (-11+60j), (5+7j): (-24+70j), (5+8j): (-39+80j), (5

In [22]:
result = 1
for i in range(2, 100001):
    result += 1 / i ** 2

print(result)

1.6449240668982423


In [None]:
with open('input.txt', 'r', encoding='utf-8') as file, open ('output.txt', 'w', encoding='utf-8') as out:
    text = [word for word in file.read().split()]

    for i, j in enumerate(text, 1):
        print (f'{i}) {j}', file=out)

In [None]:
with open(r'c:/users/user/downloads/class_scores.txt', 'r', encoding='utf-8') as file, open (r'c:/users/user/downloads/new_scores.txt', 'w', encoding='utf-8') as new_scores:
    scores = list(map(lambda lst: lst.split(), file.readlines()))
    gift_scores = list(map(lambda lst: int(lst[1]) + 5, scores))


In [67]:
with open(r'c:/users/user/downloads/class_scores.txt', 'r', encoding='utf-8') as file, \
     open(r'c:/users/user/downloads/new_scores.txt', 'w', encoding='utf-8') as new_scores:

    scores = list(map(lambda line: line.strip().split(), file))

    gift_scores = list(map(lambda lst: [lst[0], int(lst[1]) + 5 if int(lst[1]) <= 95 else 100], scores))

    for name, score in gift_scores:
        new_scores.write(f'{name} {score}\n')
