In [None]:
import pandas as pd
import numpy as np

##### Будем работать с датасетом по оттоку клиентов из банка https://www.kaggle.com/datasets/shubh0799/churn-modelling, но датасет из себя будет представлять две таблицы:

1. Личные данные клиента

  A. CustomerId - Уникальный идентификатор клиента

  B. Surname - Фамилия клиента

  C. Geography - Из какой страны клиент

  D. Gender - Пол клиента

  E. Age - Возраст клиента

  F. EstimatedSalary - Предположительная зарплата клиента

2. Данные по поведению клиента в банке

  A. CustomerId - Уникальный идентификатор клиента

  B. CustomerId - Уникальный идентификатор клиента

  C. Tenure - Сколько лет человек является клиентом банка

  D. Balance - Баланс счета

  E. NumOfProducts - Количество открытых продуктов

  F. HasCrCard - Есть ли у клиента кредитная карта

  G. IsActiveMember - Является ли клиент активные участником
  
  H. Exited - Уйдет ли человек в отток

In [None]:
df = pd.readf = pd.read_csv('Churn_Modelling.csv')  #считать исходную таблицу
df

In [None]:
users = df[['CustomerId', 'Surname', 'Geography', 'Gender', 'Age', 'EstimatedSalary']]  #создать таблицу с полями данных пользователей
users 

In [None]:
bank = df[['CustomerId', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'Exited']]  #создать таблицу с банковскими даннымиd_csv('Churn_Modelling.csv')  #считать исходную таблицу
bank

In [None]:
#users = pd.read_csv('users.csv', sep=';')
#users.head()

In [None]:
users.shape

#### Создание новых признаков

In [None]:
users['new_feature'] = 0
users.head()

In [None]:
users['Age (days)'] = users['Age'] * 365
users.head()

In [None]:
for i, row in users.iloc[:2].iterrows():
    print(row)
    print('__' * 30)

In [None]:
age_days = []

for i, row in users.iterrows():
    age_days.append(row['Age'] * 365)

age_days[:10]

In [None]:
users['Age (days) 2'] = age_days
users.head()

In [None]:
def age_to_days(x):
    return x * 365

users['Age (days) 3'] = users['Age'].apply(age_to_days)
users.head()

In [None]:
import time
from tqdm import tqdm
tqdm.pandas()


In [None]:
import time
from tqdm import tqdm
tqdm.pandas()


def age_to_days(x):
    time.sleep(0.001)
    return x * 365

users['Age'].progress_apply(age_to_days)

#### Удаление признаков

In [None]:
users.drop(columns='new_feature')
users.head()

In [None]:
users = users.drop(columns='new_feature')
users.head()

In [None]:
users['new_feature'] = 0

In [None]:
users.drop(columns='new_feature', inplace=True)
users.head()

In [None]:
users.drop(columns=['Age (days)', 'Age (days) 2', 'Age (days) 3'], inplace=True)
users.head()

#### Изменение существующих признаков
#### .loc

In [None]:
users['target'] = 0
users.head()

In [None]:
users.loc[users['Geography'] == 'France']

In [None]:
users.loc[users['Geography'] == 'France', 'target']

In [None]:
users[users['Geography'] == 'France']['target'] = 1
users.head()

In [None]:
users.loc[users['Geography'] == 'France', 'target'] = 1
users.head()

##### .replace

In [None]:
users['Gender'].replace({'Female': 'F', 'Male': 'M'}, inplace=True)
users.head()

### Методы агрегации

In [None]:
users['Age'].agg(['min', 'max'])

In [None]:
users.agg({
    'Age': ['min', 'max'],
    'EstimatedSalary': 'mean'
})

In [None]:
users.agg(
    min_age=('Age', 'min'),
    max_age=('Age', 'max'),
    mean_salary=('EstimatedSalary', 'mean')
)

#### Методы объединения

In [None]:
#bank = pd.read_csv('bank.csv', sep=';')
#bank.head()

In [None]:
bank.shape

In [None]:
merged = users.merge(bank, left_on='CustomerId', right_on='CustomerId')
merged.head()

In [None]:
users_id = users.set_index('CustomerId')
users_id.head()

In [None]:
bank_id = bank.set_index('CustomerId')
bank_id.head()

In [None]:
bank_id.join(users_id).head()

In [None]:
bank_id.join(users_id).reset_index().head()

In [None]:
bank.shape

### Атрибут how


In [None]:
toy_df1 = pd.DataFrame({
    'col_1': [1, 2, 3],
    'col_2': [9, 9, 9]
})

toy_df2 = pd.DataFrame({
    'col_1': [3, 4],
    'col_3': [0, 0]
})

display(toy_df1, toy_df2)

In [None]:
toy_df1.merge(toy_df2, how='left')

In [None]:
toy_df1.merge(toy_df2, how='right')

In [None]:
toy_df1.merge(toy_df2, how='inner')

In [None]:
toy_df1.merge(toy_df2, how='outer')

In [None]:
merged_left = bank.merge(users, on='CustomerId', how='left')
merged_left.shape

In [None]:
merged_left.isna().sum()

In [None]:
merged_left[merged_left['Age'].isna()]

In [None]:
users[users['CustomerId'] == 15682355]

#### right

In [None]:
merged_right = bank.merge(users, on='CustomerId', how='right')
merged_right.shape

In [None]:
merged_right.isna().sum()

In [None]:
merged_right[merged_right['CreditScore'].isna()]

In [None]:
bank[bank['CustomerId'] == 15611325]

#### inner

In [None]:
merged_inner = bank.merge(users, on='CustomerId', how='inner')
merged_inner.shape

In [None]:
merged_inner.isna().sum()

#### outer

In [None]:
merged_outer = bank.merge(users, on='CustomerId', how='outer')
merged_outer.shape

In [None]:
merged_outer.isna().sum()

#### Методы группировок
##### groupby

In [None]:
toy_df = pd.DataFrame({
    'client_id': [1, 2, 2, 3, 1, 1],
    'item': ['chocolate', 'cheese', 'ham', 'candy', 'chair', 'book'],
    'price': [68, 280, 302, 39, 2099, 1089]
})

toy_df

In [None]:
grouped = toy_df.groupby('client_id')
grouped

In [None]:
grouped.groups

In [None]:
grouped.agg({'price': ['sum', 'min', 'max']})

In [None]:
users.groupby('Geography').agg({'Age': ['mean'], 'EstimatedSalary': ['min']})

#### pivot_table

In [None]:
toy_df.pivot_table(index='client_id',
                   values='price',
                   aggfunc='sum')

In [None]:
users.pivot_table(index='Geography',
                  aggfunc={'Age': ['mean'], 'EstimatedSalary': 'min'})

In [None]:
users.pivot_table(index='Geography',
                  columns='Gender', 
                  values='EstimatedSalary',
                  aggfunc='mean',
                  margins=True,
                  margins_name='Total')

#### crosstab

In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'])

In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'],
            values=users['EstimatedSalary'],
            aggfunc='mean')

In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'],
            normalize='all')

In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'],
            normalize='index')

In [None]:
pd.crosstab(index=users['Geography'],
            columns=users['Gender'],
            normalize='columns')

#### Встроенные визуализации

In [None]:
import matplotlib.pyplot as plt # some imports to set up plotting
import seaborn as sns # pip install seaborn

import warnings
warnings.filterwarnings('ignore')


In [None]:
import matplotlib

In [None]:
users['Age'].hist()

In [None]:
data = users.groupby('Gender').count()['Age']
data.name = 'Gender'
data

In [None]:
data.plot.pie(y='Gender')

In [None]:
users.iloc[:100].plot.scatter(x='Age', y='EstimatedSalary')

In [None]:
data = bank.groupby('Tenure').count()['Balance']
data.name = 'num_clients'
data

In [None]:
data.plot.bar(width=0.8)

#### Seminar3

In [4]:
import pandas as pd
df = pd.read_csv('./laptop_price.csv',encoding='latin1')
df.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


### 1.1 Создать новый признак Cpu_Company, который будет содержать только название фирмы, которая произвела CPU


In [5]:
df['Cpu_Company'] = df['Cpu'].apply(lambda x: x.split(' ')[0])
df.head()


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Cpu_Company
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Intel
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Intel
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0,Intel
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Intel
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Intel


1.2 Создать новый признак Memory_Amount, который будет содержать только количество Gb памяти без указания типа носителя


In [6]:
def convert_to_gb(x):
    memory = x.split(' ')[0]
    if memory.endswith('GB'):
        res = memory.replace('GB', '')
    elif memory.endswith('TB'):
        res = float(memory.replace('TB', '')) * 1024
    return int(res)

In [7]:
df['Memory_Amount'] = df['Memory'].apply(convert_to_gb)
df.head()


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Cpu_Company,Memory_Amount
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Intel,128
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Intel,128
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0,Intel,256
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Intel,512
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Intel,256


1.3 Создать новый признак Memory_Type, который будет содержать только тип носителя (HDD/SDD/др.)


In [8]:
def conver_to_type(x):
    if '+' in x:
        memory1 = x.split('B')[1][:x.split('B')[1].find('+')].strip()
        memory2 = x.split('B')[-1].strip()
        res = memory1 + ' ' + memory2
    else: res = x.split('B')[-1].strip()
    return res

df['Memory_Type'] = df['Memory'].apply(conver_to_type)
df.head()


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Cpu_Company,Memory_Amount,Memory_Type
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Intel,128,SSD
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Intel,128,Flash Storage
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0,Intel,256,SSD
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Intel,512,SSD
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Intel,256,SSD


1.4 Удалите признаки Memory и ScreenResolution


In [9]:
df.drop(columns=['Memory', 'ScreenResolution'], inplace=True)
df.head()


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Cpu_Company,Memory_Amount,Memory_Type
0,1,Apple,MacBook Pro,Ultrabook,13.3,Intel Core i5 2.3GHz,8GB,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Intel,128,SSD
1,2,Apple,Macbook Air,Ultrabook,13.3,Intel Core i5 1.8GHz,8GB,Intel HD Graphics 6000,macOS,1.34kg,898.94,Intel,128,Flash Storage
2,3,HP,250 G6,Notebook,15.6,Intel Core i5 7200U 2.5GHz,8GB,Intel HD Graphics 620,No OS,1.86kg,575.0,Intel,256,SSD
3,4,Apple,MacBook Pro,Ultrabook,15.4,Intel Core i7 2.7GHz,16GB,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Intel,512,SSD
4,5,Apple,MacBook Pro,Ultrabook,13.3,Intel Core i5 3.1GHz,8GB,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Intel,256,SSD


2.1 Создайте признак SSD, который изначально равен 0


In [10]:
df['SSD'] = 0
df.head()


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Cpu_Company,Memory_Amount,Memory_Type,SSD
0,1,Apple,MacBook Pro,Ultrabook,13.3,Intel Core i5 2.3GHz,8GB,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Intel,128,SSD,0
1,2,Apple,Macbook Air,Ultrabook,13.3,Intel Core i5 1.8GHz,8GB,Intel HD Graphics 6000,macOS,1.34kg,898.94,Intel,128,Flash Storage,0
2,3,HP,250 G6,Notebook,15.6,Intel Core i5 7200U 2.5GHz,8GB,Intel HD Graphics 620,No OS,1.86kg,575.0,Intel,256,SSD,0
3,4,Apple,MacBook Pro,Ultrabook,15.4,Intel Core i7 2.7GHz,16GB,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Intel,512,SSD,0
4,5,Apple,MacBook Pro,Ultrabook,13.3,Intel Core i5 3.1GHz,8GB,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Intel,256,SSD,0


2.2 Поставьте в признаке SSD 1, если ноутбук действительно с типом носителя SSD


In [11]:
df.loc[df['Memory_Type'] == 'SSD', 'SSD'] = 1
df.head()


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Cpu_Company,Memory_Amount,Memory_Type,SSD
0,1,Apple,MacBook Pro,Ultrabook,13.3,Intel Core i5 2.3GHz,8GB,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Intel,128,SSD,1
1,2,Apple,Macbook Air,Ultrabook,13.3,Intel Core i5 1.8GHz,8GB,Intel HD Graphics 6000,macOS,1.34kg,898.94,Intel,128,Flash Storage,0
2,3,HP,250 G6,Notebook,15.6,Intel Core i5 7200U 2.5GHz,8GB,Intel HD Graphics 620,No OS,1.86kg,575.0,Intel,256,SSD,1
3,4,Apple,MacBook Pro,Ultrabook,15.4,Intel Core i7 2.7GHz,16GB,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Intel,512,SSD,1
4,5,Apple,MacBook Pro,Ultrabook,13.3,Intel Core i5 3.1GHz,8GB,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Intel,256,SSD,1


2.3 Уберите в признаке Weight значения 'kg' и поменяйте его тип данных на вещественный


In [12]:
df['Weight'] = df['Weight'].str.replace('kg', '').astype('float')
df.head()
df['Weight'].dtype


dtype('float64')

3.1 Присоедините к таблице clients данные по ноутбукам через метод join
Это нужно, чтобы понимать, какие ноутбуки покупались клиентами

laptop_id - это индексы датафрейма с ноутбуками


In [14]:
clients = pd.DataFrame({
    'client_id': [45, 32, 67, 33, 43],
    'laptop_id': [506, 398, 710, 120, 1999]
})


In [15]:
clients

Unnamed: 0,client_id,laptop_id
0,45,506
1,32,398
2,67,710
3,33,120
4,43,1999


In [16]:
clints_lap_id = clients.set_index('laptop_id')
clints_lap_id


Unnamed: 0_level_0,client_id
laptop_id,Unnamed: 1_level_1
506,45
398,32
710,67
120,33
1999,43


In [17]:
joined = clints_lap_id.join(df)
joined

Unnamed: 0_level_0,client_id,laptop_ID,Company,Product,TypeName,Inches,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Cpu_Company,Memory_Amount,Memory_Type,SSD
laptop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
506,45,513.0,Asus,ZenBook UX510UX-CN211T,Notebook,15.6,Intel Core i7 7500U 2.7GHz,8GB,Intel HD Graphics 620,Windows 10,2.0,1224.0,Intel,256.0,SSD HDD,0.0
398,32,405.0,Dell,Precision M5520,Workstation,15.6,Intel Core i7 7700HQ 2.8GHz,8GB,Nvidia Quadro M1200,Windows 10,1.78,2712.0,Intel,256.0,SSD,1.0
710,67,718.0,Lenovo,Legion Y520-15IKBN,Gaming,15.6,Intel Core i7 7700HQ 2.8GHz,8GB,Nvidia GeForce GTX 1050 Ti,Windows 10,2.5,1249.0,Intel,128.0,SSD HDD,0.0
120,33,123.0,Acer,Spin 3,Notebook,15.6,Intel Core i3 7100U 2.4GHz,6GB,Intel HD Graphics 620,Windows 10,2.1,479.0,Intel,1024.0,HDD,0.0
1999,43,,,,,,,,,,,,,,,


3.2 Присоедините к таблице clients данные по ноутбукам через метод merge
Это нужно, чтобы понимать, какие ноутбуки покупались клиентами

laptop_id - это индексы датафрейма с ноутбуками


In [18]:
clints_lap_id = clients
clints_lap_id


Unnamed: 0,client_id,laptop_id
0,45,506
1,32,398
2,67,710
3,33,120
4,43,1999


In [19]:
df['laptop_id'] = df.index
df.head()


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Cpu_Company,Memory_Amount,Memory_Type,SSD,laptop_id
0,1,Apple,MacBook Pro,Ultrabook,13.3,Intel Core i5 2.3GHz,8GB,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,Intel,128,SSD,1,0
1,2,Apple,Macbook Air,Ultrabook,13.3,Intel Core i5 1.8GHz,8GB,Intel HD Graphics 6000,macOS,1.34,898.94,Intel,128,Flash Storage,0,1
2,3,HP,250 G6,Notebook,15.6,Intel Core i5 7200U 2.5GHz,8GB,Intel HD Graphics 620,No OS,1.86,575.0,Intel,256,SSD,1,2
3,4,Apple,MacBook Pro,Ultrabook,15.4,Intel Core i7 2.7GHz,16GB,AMD Radeon Pro 455,macOS,1.83,2537.45,Intel,512,SSD,1,3
4,5,Apple,MacBook Pro,Ultrabook,13.3,Intel Core i5 3.1GHz,8GB,Intel Iris Plus Graphics 650,macOS,1.37,1803.6,Intel,256,SSD,1,4


In [20]:
merged = clints_lap_id.merge(df, on='laptop_id')
merged

Unnamed: 0,client_id,laptop_id,laptop_ID,Company,Product,TypeName,Inches,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Cpu_Company,Memory_Amount,Memory_Type,SSD
0,45,506,513,Asus,ZenBook UX510UX-CN211T,Notebook,15.6,Intel Core i7 7500U 2.7GHz,8GB,Intel HD Graphics 620,Windows 10,2.0,1224.0,Intel,256,SSD HDD,0
1,32,398,405,Dell,Precision M5520,Workstation,15.6,Intel Core i7 7700HQ 2.8GHz,8GB,Nvidia Quadro M1200,Windows 10,1.78,2712.0,Intel,256,SSD,1
2,67,710,718,Lenovo,Legion Y520-15IKBN,Gaming,15.6,Intel Core i7 7700HQ 2.8GHz,8GB,Nvidia GeForce GTX 1050 Ti,Windows 10,2.5,1249.0,Intel,128,SSD HDD,0
3,33,120,123,Acer,Spin 3,Notebook,15.6,Intel Core i3 7100U 2.4GHz,6GB,Intel HD Graphics 620,Windows 10,2.1,479.0,Intel,1024,HDD,0


4.1 Найдите среднюю стоимость ноутбуков в зависимости от компании производителя
Отсортируйте от меньшей стоимости к большей


In [21]:
df.groupby('Company').agg({'Price_euros': 'mean'}).sort_values('Price_euros')


Unnamed: 0_level_0,Price_euros
Company,Unnamed: 1_level_1
Vero,217.425
Mediacom,295.0
Chuwi,314.296667
Acer,626.775825
Fujitsu,729.0
HP,1067.774854
Lenovo,1086.384444
Asus,1104.169367
Xiaomi,1133.4625
Dell,1186.06899


4.2 Найдите минимальную, среднюю и максимальную стоимости ноутбуков в зависимости от производителя процессора


In [22]:
df.groupby('Cpu_Company').agg({'Price_euros': ['min', 'mean', 'max']})


Unnamed: 0_level_0,Price_euros,Price_euros,Price_euros
Unnamed: 0_level_1,min,mean,max
Cpu_Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
AMD,199.0,560.638871,2199.0
Intel,174.0,1152.214145,6099.0
Samsung,659.0,659.0,659.0


4.3 Постройте таблицу с подсчетом количества ноутбуков в данных в зависимости от производителя CPU и ОЗУ


In [23]:
df.pivot_table(index='Cpu_Company', columns='Ram', aggfunc='count', values='Price_euros', fill_value=0)


Ram,12GB,16GB,24GB,2GB,32GB,4GB,64GB,6GB,8GB
Cpu_Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AMD,2,3,0,1,0,31,0,13,12
Intel,23,197,3,21,17,343,1,28,607
Samsung,0,0,0,0,0,1,0,0,0


4.4 Постройте таблицу с подсчетом средней стоимости ноутбуков в данных в зависимости от операционной системы и GB памяти


In [24]:
df.pivot_table(index='OpSys', columns='Memory_Amount', aggfunc='mean', values='Price_euros', fill_value='-')


Memory_Amount,8,16,32,64,128,180,240,256,500,508,512,1024,2048
OpSys,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Android,-,-,-,434.0,-,-,-,-,-,-,-,-,-
Chrome OS,-,305.38,412.454545,774.333333,1275.0,-,-,1559.0,-,-,2199.0,-,-
Linux,-,-,-,-,742.25,-,-,811.638125,389.056364,-,-,581.40129,-
Mac OS X,-,-,-,-,1099.0,-,-,1311.994,-,-,1222.0,-,-
No OS,-,-,-,-,562.14,-,-,782.989286,404.675385,-,1098.5,540.5396,594.0
Windows 10,2249.0,-,270.001471,499.716,1026.489167,1073.5,3100.0,1334.456872,664.717647,-,1911.985285,904.827906,666.9475
Windows 10 S,-,-,-,308.995,1039.0,-,-,1668.95,-,-,2589.0,-,-
Windows 7,-,-,-,-,1320.323333,1199.0,-,1846.4668,924.048333,1002.0,2235.396667,1539.666667,-
macOS,-,-,-,-,1119.315,-,-,1600.37,-,-,2180.87,-,-


5.1 Ноутбуков каких компаний и с каким процессором больше?


In [25]:
pd.crosstab(index=df['Company'], columns=df['Cpu_Company'])


Cpu_Company,AMD,Intel,Samsung
Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Acer,10,93,0
Apple,0,21,0
Asus,11,147,0
Chuwi,0,3,0
Dell,0,297,0
Fujitsu,0,3,0
Google,0,3,0
HP,25,249,0
Huawei,0,2,0
LG,0,3,0


5.2 С каким типом памяти и с каким объемом памяти больше ноутбуков?


In [26]:
pd.crosstab(index=df['Memory_Type'], columns=df['Memory_Amount'])


Memory_Amount,8,16,32,64,128,180,240,256,500,508,512,1024,2048
Memory_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Flash Storage,0,7,38,15,4,0,0,8,0,0,2,0,0
Flash Storage HDD,0,0,0,1,0,0,0,0,0,0,0,0,0
HDD,0,0,1,0,1,0,0,0,132,0,0,224,16
HDD HDD,0,0,0,0,0,0,0,0,0,0,0,1,0
Hybrid,0,0,0,0,0,0,0,0,0,1,0,9,0
SSD,1,3,6,1,76,5,1,412,0,0,118,14,0
SSD HDD,0,0,0,0,96,0,0,85,0,0,17,2,0
SSD Hybrid,0,0,0,0,0,0,0,1,0,0,1,0,0
SSD SSD,0,0,0,0,0,0,0,2,0,0,2,0,0
