In [87]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [88]:
diabetes = pd.read_csv('data/diabetes_data.csv')
diabetes.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Gender
0,6,98,58,33,190,34.0,0.43,43,0,Female
1,2,112,75,32,0,35.7,0.148,21,0,Female
2,2,108,64,0,0,30.8,0.158,21,0,Female
3,8,107,80,0,0,24.6,0.856,34,0,Female
4,7,136,90,0,0,29.9,0.21,50,0,Female


In [89]:
dupl_mask = diabetes.duplicated()
dupl_mask.shape[0]
diabetes = diabetes.drop_duplicates()

In [90]:
diabetes.count()

Pregnancies                 768
Glucose                     768
BloodPressure               768
SkinThickness               768
Insulin                     768
BMI                         768
DiabetesPedigreeFunction    768
Age                         768
Outcome                     768
Gender                      768
dtype: int64

In [91]:
low_info = []
for col in diabetes.columns:
    top_freq = diabetes[col].value_counts(normalize=True).max()
    nunique_rate = diabetes[col].nunique() / diabetes[col].count()
    if top_freq > 0.95:
        low_info.append(col)
        print(f'{col}: {round(top_freq*100, 2)}% одинаковых значений')
    if nunique_rate > 0.95:
        low_info.append(col)
        print(f'{col}: {round(nunique_rate*100, 2)}% уникальных значений')

Gender: 100.0% одинаковых значений


In [92]:
diabetes.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Gender
0,6,98,58,33,190,34.0,0.43,43,0,Female
1,2,112,75,32,0,35.7,0.148,21,0,Female
2,2,108,64,0,0,30.8,0.158,21,0,Female
3,8,107,80,0,0,24.6,0.856,34,0,Female
4,7,136,90,0,0,29.9,0.21,50,0,Female


In [93]:
diabetes.isnull()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Gender
0,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...
763,False,False,False,False,False,False,False,False,False,False
764,False,False,False,False,False,False,False,False,False,False
765,False,False,False,False,False,False,False,False,False,False
766,False,False,False,False,False,False,False,False,False,False


In [94]:
diabetes[diabetes['Insulin'] == 0].count() / diabetes.count()

Pregnancies                 0.486979
Glucose                     0.486979
BloodPressure               0.486979
SkinThickness               0.486979
Insulin                     0.486979
BMI                         0.486979
DiabetesPedigreeFunction    0.486979
Age                         0.486979
Outcome                     0.486979
Gender                      0.486979
dtype: float64

In [95]:
import numpy as np

In [96]:

diabetes["Glucose"] = diabetes["Glucose"].apply(lambda x: np.nan if x == 0 else x)
diabetes["BloodPressure"] = diabetes["BloodPressure"].apply(lambda x: np.nan if x == 0 else x)
diabetes["SkinThickness"] = diabetes["SkinThickness"].apply(lambda x: np.nan if x == 0 else x)
diabetes["Insulin"] = diabetes["Insulin"].apply(lambda x: np.nan if x == 0 else x)
diabetes["BMI"] = diabetes["BMI"].apply(lambda x: np.nan if x == 0 else x)

In [97]:
tresh = diabetes.shape[0] * 0.7

In [98]:
print(tresh)

537.5999999999999


In [99]:
diabetes = diabetes.dropna(axis=1, thresh=tresh)

In [100]:
colls_null_proc = diabetes.isnull().mean() * 100

In [101]:
print(colls_null_proc)

Pregnancies                  0.000000
Glucose                      0.651042
BloodPressure                4.557292
SkinThickness               29.557292
BMI                          1.432292
DiabetesPedigreeFunction     0.000000
Age                          0.000000
Outcome                      0.000000
Gender                       0.000000
dtype: float64


In [102]:
A = len(diabetes.columns)


In [103]:
print(A)

9


In [104]:
diabetes = diabetes.dropna(axis=0, thresh=6)

In [105]:
diabetes.shape[0]

768

In [106]:
colls_null_proc = diabetes.isnull().mean() * 100
colls_null_contain = colls_null_proc[colls_null_proc > 0]
print(colls_null_contain)

Glucose           0.651042
BloodPressure     4.557292
SkinThickness    29.557292
BMI               1.432292
dtype: float64


In [107]:
fillna_dict = {'Glucose': diabetes['Glucose'].median(),
               'BloodPressure': diabetes['BloodPressure'].median(),
               'SkinThickness': diabetes['SkinThickness'].median(),
               'BMI': diabetes['BMI'].median()}

In [108]:
diabetes = diabetes.fillna(fillna_dict)

In [109]:
diabetes['SkinThickness'].describe()

count    768.000000
mean      29.108073
std        8.791221
min        7.000000
25%       25.000000
50%       29.000000
75%       32.000000
max       99.000000
Name: SkinThickness, dtype: float64

In [110]:
def outliers_iqr_mod(data, feature, n_iqr=1.5):
    x = data[feature]
    quartile_1, quartile_3 = x.quantile(0.25), x.quantile(0.75),
    iqr = quartile_3 - quartile_1
    lower_bound = quartile_1 - (iqr * n_iqr)
    upper_bound = quartile_3 + (iqr * n_iqr)
    outliers = data[(x < lower_bound) | (x > upper_bound)]
    cleaned = data[(x >= lower_bound) & (x <= upper_bound)]
    return outliers, cleaned


In [111]:
outliers, cleaned = outliers_iqr_mod(diabetes, feature='SkinThickness')
print(f'Выбросов по итогу {outliers.shape[0]}')

Выбросов по итогу 87


In [112]:
def outliers_z_score_mod(data, feature, log_scale=False, left=3, right=3):
    if log_scale:
        x = np.log(data[feature]+1)
    else:
        x = data[feature]
    mu = x.mean()
    sigma = x.std()
    lower_bound = mu - left * sigma
    upper_bound = mu + right * sigma
    outliers = data[(x < lower_bound) | (x > upper_bound)]
    cleaned = data[(x >= lower_bound) & (x <= upper_bound)]
    return outliers, cleaned

In [113]:
diabetes.shape[0]

768

In [114]:
outliers, cleaned = outliers_z_score_mod(data=diabetes, feature='SkinThickness')
print(f'Выбросов по итогу {outliers.shape[0]}')

Выбросов по итогу 4


In [116]:
outliers, cleaned = outliers_z_score_mod(data=diabetes, feature='DiabetesPedigreeFunction')
print(f'Выбросов по итогу {outliers.shape[0]}')

Выбросов по итогу 11


In [117]:
outliers, cleaned = outliers_z_score_mod(data=diabetes, feature='DiabetesPedigreeFunction', log_scale=True)
print(f'Выбросов по итогу {outliers.shape[0]}')

Выбросов по итогу 9


In [119]:
f = turtle

NameError: name 'turtle' is not defined

In [None]:
class SalesReport():
    

In [14]:
class DepartmentReport():

    def __init__(self, company_name):
        """
        Метод инициализации класса. 
        Создаёт атрибуты revenues и company
        """
        #ваш код здесь
        
        self.revenues = []
        self.company = company_name
    
    def add_revenue(self, amount):
        """
        Метод для добавления выручки отдела в список revenues.
        Если атрибута revenues ещё не существует, метод должен создавать пустой список перед добавлением выручки.
        """
        #ваш код здесь
        self.revenues.append(amount)
    
    def average_revenue(self):
        """
        Вычисляет average_revenue — среднюю выручку по отделам — округляя до целого.
        Метод возвращает строку в формате:
        'Average department revenue for <company>: <average_revenue>'
        """
        #ваш код здесь
        print('Average department revenue for')

In [16]:

res = DepartmentReport('Danon')
res.add_revenue(500000)
res.add_revenue(150000)
res.add_revenue(1500)


print(res.average_revenue())


Average department revenue for
None


In [None]:
class IntDataFrame():
    def __init__(self, list, counter=0):
        self.list = list
        self.column = []
        self.counter = counter
        self.uni_list = []
        self.list_to_int()
    def list_to_int(self):
        for i in self.list:
            i = int(i)
            self.column.append(i)
    def count(self):
        for i in self.column:
            if not i == 0:
                self.counter += 1
            return self.counter
    def unique(self):
        for i in self.column:
            if not i in self.uni_list:
                self.uni_list.append(i)
            return len(self.uni_list)

In [23]:
s = {"info": None, "warning": None, "error": None, "all": None}
a = s['info'] = ['fa', 'ffg']
print(a[-1])

ffg


In [46]:
class OwnLogger():
    def __init__(self):
        self.logs = {"info": None, "warning": None, "error": None, "all": None}
    def log(self, message, level):
        if self.logs['all'] == None:
            self.logs['all'] = [message]
        else:
            self.logs['all'] = self.logs['all'].append(message)
        if self.logs[level] == None:
            self.logs[level] = [message]
        else:
            self.logs[level] = self.logs[level].append(message)
    def show_last(self, level='all'):
        if level == 'all' and self.logs['all'] == None:
            return None
        elif level == 'all':
            return self.logs['all'][-1]
        elif self.logs[level] == None:
            return None
        else:
            return self.logs[level][-1] 
    def show_log(self):
        print(self.logs)

In [49]:
class OwnLogger():
   def __init__(self):
       self.logs = {"info": None, "warning": None, "error": None, "all": None}
   def log(self, message, level):
       self.logs[level] = message
       self.logs['all'] = message
   def show_last(self, level='all'):
       return self.logs[level]

In [51]:
res = OwnLogger()
res.log(message='System started', level='info')
res.log(message='System started 2', level='info')
res.log(message='Warning message', level='warning')




print(res.show_last(level='info'))

System started 2


In [48]:
print(res.show_log())

None
