# Chapter 2: The Basics of Python and R

## Working With the Data

In [2]:
import pandas as pd
import numpy as np

bank_train = pd.read_csv("bank_marketing_training")

pd.crosstab(bank_train["response"], bank_train["previous_outcome"])

previous_outcome,failure,nonexistent,success
response,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,2390,21176,320
yes,385,2034,569


In [7]:
crosstab_01 = pd.crosstab(
    bank_train["response"], bank_train["previous_outcome"], margins=True
)

In [8]:
crosstab_01

previous_outcome,failure,nonexistent,success,All
response,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
no,2390,21176,320,23886
yes,385,2034,569,2988
All,2775,23210,889,26874


In [10]:
first_nine = bank_train.head(9)
first_nine

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,days_since_previous,previous,previous_outcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,response
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
2,41,blue-collar,married,unknown,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
3,25,services,single,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
4,29,blue-collar,single,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
5,57,housemaid,divorced,basic.4y,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
6,35,blue-collar,married,basic.6y,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
7,39,management,single,basic.9y,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
8,30,unemployed,married,high.school,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no


In [11]:
two_cols = bank_train[["age", "marital"]]
two_cols

Unnamed: 0,age,marital
0,56,married
1,57,married
2,41,married
3,25,single
4,29,single
...,...,...
26869,36,married
26870,37,married
26871,29,single
26872,73,married


In [25]:
# first_three = bank_train[["age", "marital"]].head(3)
# https://www.activestate.com/resources/quick-reads/how-to-slice-a-dataframe-in-pandas/
first_three = bank_train.iloc[:5, [0, 2]]
first_three

Unnamed: 0,age,marital,default
0,56,married,no
1,57,married,unknown
2,41,married,unknown
3,25,single,no
4,29,single,no


## Hands-On Analysis

In [81]:
from sklearn.tree import DecisionTreeClassifier

adult = pd.read_csv("adult_ch3_training")

In [80]:
# 27
table01 = pd.crosstab(adult["workclass"], adult["sex"])
table01

sex,Female,Male
workclass,Unnamed: 1_level_1,Unnamed: 2_level_1
?,377,452
Federal-gov,149,305
Local-gov,377,592
Never-worked,1,4
Private,3574,6707
Self-emp-inc,54,444
Self-emp-not-inc,178,992
State-gov,201,385
Without-pay,1,4


In [29]:
# 28
table02 = pd.crosstab(adult["sex"], adult["marital-status"])
table02

marital-status,Divorced,Married-AF-spouse,Married-civ-spouse,Married-spouse-absent,Never-married,Separated,Widowed
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Female,1219,7,761,95,2160,290,380
Male,795,4,6010,104,2717,182,73


In [79]:
# 29
adult.iloc[0].loc[["sex", "workclass"]]

# 991 others

sex                      Male
workclass    Self-emp-not-inc
Name: 0, dtype: object

In [74]:
# 30
adult.loc[6:10, ["sex", "marital-status"]]

Unnamed: 0,sex,marital-status
6,Male,Married-civ-spouse
7,Male,Married-civ-spouse
8,Male,Married-civ-spouse
9,Male,Divorced
10,Male,Married-civ-spouse


In [77]:
# 31
adult_married = adult[adult["marital-status"] == "Married-civ-spouse"]
adult_married

Unnamed: 0,age,workclass,education,marital-status,occupation,sex,capital-gain,capital-loss,income
0,50,Self-emp-not-inc,13,Married-civ-spouse,Exec-managerial,Male,0,0,<=50K
3,52,Self-emp-not-inc,9,Married-civ-spouse,Exec-managerial,Male,0,0,>50K
5,40,Private,11,Married-civ-spouse,Craft-repair,Male,0,0,>50K
6,38,Private,7,Married-civ-spouse,Sales,Male,0,0,<=50K
7,43,Private,7,Married-civ-spouse,Transport-moving,Male,0,2042,<=50K
...,...,...,...,...,...,...,...,...,...
14782,32,Private,6,Married-civ-spouse,Transport-moving,Male,0,0,<=50K
14784,34,Private,16,Married-civ-spouse,Prof-specialty,Male,0,0,>50K
14788,39,Local-gov,12,Married-civ-spouse,Adm-clerical,Female,0,0,>50K
14792,43,Self-emp-not-inc,10,Married-civ-spouse,Craft-repair,Male,0,0,<=50K


In [82]:
# 32
table03 = pd.crosstab(adult_married["workclass"], adult_married["sex"])
table03

sex,Female,Male
workclass,Unnamed: 1_level_1,Unnamed: 2_level_1
?,67,224
Federal-gov,20,203
Local-gov,68,411
Never-worked,1,0
Private,491,3883
Self-emp-inc,24,347
Self-emp-not-inc,57,703
State-gov,33,237
Without-pay,0,2


In [83]:
# 33
adult_over_40 = adult[adult["age"] > 40]
adult_over_40

Unnamed: 0,age,workclass,education,marital-status,occupation,sex,capital-gain,capital-loss,income
0,50,Self-emp-not-inc,13,Married-civ-spouse,Exec-managerial,Male,0,0,<=50K
2,49,Private,5,Married-spouse-absent,Other-service,Female,0,0,<=50K
3,52,Self-emp-not-inc,9,Married-civ-spouse,Exec-managerial,Male,0,0,>50K
7,43,Private,7,Married-civ-spouse,Transport-moving,Male,0,2042,<=50K
8,54,?,10,Married-civ-spouse,?,Male,0,0,>50K
...,...,...,...,...,...,...,...,...,...
14787,45,Local-gov,12,Divorced,Prof-specialty,Female,0,0,<=50K
14790,65,Self-emp-not-inc,15,Never-married,Prof-specialty,Male,1086,0,<=50K
14791,43,State-gov,10,Divorced,Adm-clerical,Female,0,0,<=50K
14792,43,Self-emp-not-inc,10,Married-civ-spouse,Craft-repair,Male,0,0,<=50K


In [84]:
# 34
table04 = pd.crosstab(adult_over_40["sex"], adult_over_40["marital-status"])
table04

marital-status,Divorced,Married-civ-spouse,Married-spouse-absent,Never-married,Separated,Widowed
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,720,305,38,256,133,354
Male,448,3383,47,315,74,71


In [85]:
table02

marital-status,Divorced,Married-AF-spouse,Married-civ-spouse,Married-spouse-absent,Never-married,Separated,Widowed
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Female,1219,7,761,95,2160,290,380
Male,795,4,6010,104,2717,182,73
