In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('csv/small_customers.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   CustomerID        100 non-null    object 
 1   Age               100 non-null    int64  
 2   gender            100 non-null    object 
 3   SeniorCitizen     100 non-null    int64  
 4   Partner           100 non-null    object 
 5   Dependents        100 non-null    object 
 6   Tenure            100 non-null    int64  
 7   PhoneService      100 non-null    object 
 8   MultipleLines     100 non-null    object 
 9   InternetService   100 non-null    object 
 10  DeviceProtection  100 non-null    object 
 11  PaperlessBilling  100 non-null    object 
 12  PaymentMethod     100 non-null    object 
 13  TotalCharges      100 non-null    float64
dtypes: float64(1), int64(3), object(10)
memory usage: 11.1+ KB


In [4]:
df.describe()

Unnamed: 0,Age,SeniorCitizen,Tenure,TotalCharges
count,100.0,100.0,100.0,100.0
mean,49.13,0.56,35.32,108.704
std,23.80122,0.498888,20.716889,8.839755
min,4.0,0.0,0.0,65.85
25%,28.0,0.0,18.0,103.075
50%,48.5,1.0,32.5,110.425
75%,69.0,1.0,52.5,115.025
max,100.0,1.0,72.0,119.7


In [5]:
kids = df[df.Age < 13]
teenagers = df[(df.Age >= 13) & (df.Age < 18)]
adults = df[df.Age >= 18]

In [6]:
kids.TotalCharges.mean()

np.float64(104.26)

In [7]:
adults.TotalCharges.mean()

np.float64(108.92340425531914)

In [8]:
kids.TotalCharges.median()

np.float64(104.525)

In [11]:
adults.TotalCharges.median()

np.float64(110.765)

In [10]:
adults.TotalCharges.agg(['mean', 'median'])

mean      108.923404
median    110.765000
Name: TotalCharges, dtype: float64

In [12]:
kids.TotalCharges.agg(['mean', 'median'])

mean      104.260
median    104.525
Name: TotalCharges, dtype: float64

In [13]:
age_groups = [kids, teenagers, adults]

for group in age_groups:
    stats = group.TotalCharges.agg(['mean','median'])
    print(stats)

mean      104.260
median    104.525
Name: TotalCharges, dtype: float64
mean      107.28
median    107.28
Name: TotalCharges, dtype: float64
mean      108.923404
median    110.765000
Name: TotalCharges, dtype: float64


In [15]:
age_groups = {
    'kids':kids, 
    'teenagers': teenagers, 
    'adults': adults,
}

for name,group in age_groups.items():
    stats = group.TotalCharges.agg(['mean','median'])
    print(name)
    print(stats)
    print('-------')

kids
mean      104.260
median    104.525
Name: TotalCharges, dtype: float64
-------
teenagers
mean      107.28
median    107.28
Name: TotalCharges, dtype: float64
-------
adults
mean      108.923404
median    110.765000
Name: TotalCharges, dtype: float64
-------


In [16]:
age_groups = {
    'kids':kids, 
    'teenagers': teenagers,
    'young adults': df[(df.Age >= 18) & (df.Age < 28)],
    'adults': df[df.Age >= 28],
}

for name,group in age_groups.items():
    stats = group.TotalCharges.agg(['mean','median'])
    print(name)
    print(stats)
    print('-------')

kids
mean      104.260
median    104.525
Name: TotalCharges, dtype: float64
-------
teenagers
mean      107.28
median    107.28
Name: TotalCharges, dtype: float64
-------
young adults
mean      109.556667
median    110.775000
Name: TotalCharges, dtype: float64
-------
adults
mean      108.773421
median    110.665000
Name: TotalCharges, dtype: float64
-------


In [17]:
age_groups = {
    'kids':kids, 
    'teenagers': teenagers,
    'young adults': df[(df.Age >= 18) & (df.Age < 28)],
    'adults': df[df.Age >= 28],
}

for name,group in age_groups.items():
    stats = group.TotalCharges.agg(['mean','sum'])
    print(name)
    print(stats)
    print('-------')

kids
mean    104.26
sum     417.04
Name: TotalCharges, dtype: float64
-------
teenagers
mean    107.28
sum     214.56
Name: TotalCharges, dtype: float64
-------
young adults
mean     109.556667
sum     1972.020000
Name: TotalCharges, dtype: float64
-------
adults
mean     108.773421
sum     8266.780000
Name: TotalCharges, dtype: float64
-------


In [19]:
groups = age_groups['kids']

(group.TotalCharges >= 100).sum()

np.int64(72)

In [24]:
group_size = group.TotalCharges.count()
n_over_100 = (group.TotalCharges >= 100).sum()
percent_over_100 = n_over_100 / group_size

percent_over_100

np.float64(0.9473684210526315)

In [26]:
def percent_purchased_at_least(group, amount):
    group_size = group['TotalCharges'].count()
    n_over_100 = (group['TotalCharges'] >= amount).sum()
    percent_over_100 = n_over_100 / group_size

    return percent_over_100

In [27]:
percent_purchased_at_least(age_groups['kids'], 100)

np.float64(1.0)

In [28]:
age_groups = {
    'kids':kids, 
    'teenagers': teenagers,
    'young adults': df[(df.Age >= 18) & (df.Age < 28)],
    'adults': df[df.Age >= 28],
}

for name,group in age_groups.items():
    stats = percent_purchased_at_least(group, 100)
    print(name)
    print(stats)
    print('-------')

kids
1.0
-------
teenagers
1.0
-------
young adults
1.0
-------
adults
0.9473684210526315
-------
