In [1]:
import pandas as pd
import numpy as np


In [2]:
churn = pd.read_csv('churn.csv')
# Predefined list to use as index

new_index = [['California', 'California', 'New York', 'Ohio'], 
             ['Los Angeles', 'San Francisco', 'New York', 'Cleveland']]

# Create a multi-level index using predefined new_index
churn_new = pd.MultiIndex.from_arrays(new_index, names=['state', 'city'])

# Assign the new index to the churn index
churn.index = churn_new

# Reshape by stacking churn DataFrame
churn_stack = churn.stack()

# Print churn_stack
print(churn_stack)

state       city                            
California  Los Angeles    Area code            408
                           total_day_calls      116
                           total_day_minutes    204
            San Francisco  Area code            408
                           total_day_calls      109
                           total_day_minutes    287
New York    New York       Area code            415
                           total_day_calls       84
                           total_day_minutes     84
Ohio        Cleveland      Area code            510
                           total_day_calls       67
                           total_day_minutes     50
dtype: int64


In [3]:
# edited/added
churn = pd.read_csv('churn_long.csv')
churn = pd.pivot_table(churn, index=['state', 'city'], columns=['period', 'metric'], values='value')
churn = churn.reset_index()
churn.columns.names = (None, None)

# Set state and city as index modifying the DataFrame
churn.set_index(['state', 'city'], inplace=True)

# Print churn
print(churn)


                                 day                     night              
                         total calls total minutes total calls total minutes
state      city                                                             
California Los Angeles            85           107         116           204
           San Francisco          90           167         109           287
New York   New York               75            90          84            84
Ohio       Cleveland              67           110          67            50


In [4]:

# edited/added
churn = pd.read_csv('churn_long.csv')
churn = pd.pivot_table(churn, index=['state', 'city'], columns=['period', 'metric'], values='value')
churn = churn.reset_index()
churn.columns.names = (None, None)

# Set state and city as index modifying the DataFrame
churn.set_index(['state', 'city'], inplace=True)

# Reshape by stacking the second level
churn_stack = churn.stack(level=1)

# Print churn_stack
print(churn_stack)

                                        day  night
state      city                                   
California Los Angeles   total calls     85    116
                         total minutes  107    204
           San Francisco total calls     90    109
                         total minutes  167    287
New York   New York      total calls     75     84
                         total minutes   90     84
Ohio       Cleveland     total calls     67     67
                         total minutes  110     50


In [5]:
# edited/added
churn = pd.read_csv('churn_long_v1.csv')
churn = pd.pivot_table(churn, index=['state', 'city'], columns=['time', 'feature'], values='value')

# Stack churn by the time column level
churn_time = churn.stack(level='time')

# Print churn_time
print(churn_time)


feature                         text_messages  total_GB
state      city          time                          
California Los Angeles   day               20         5
                         night             30        10
           San Francisco day               40         5
                         night            100         5
New York   New York      day               50         2
                         night             20         9
Ohio       Cleveland     day              100         3
                         night             40         6


In [6]:

# Stack churn by the feature column level
churn_feature = churn.stack(level='feature')

# Print churn_feature
print(churn_feature)

time                                    day  night
state      city          feature                  
California Los Angeles   text_messages   20     30
                         total_GB         5     10
           San Francisco text_messages   40    100
                         total_GB         5      5
New York   New York      text_messages   50     20
                         total_GB         2      9
Ohio       Cleveland     text_messages  100     40
                         total_GB         3      6


In [7]:
churn

Unnamed: 0_level_0,time,day,day,night,night
Unnamed: 0_level_1,feature,text_messages,total_GB,text_messages,total_GB
state,city,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
California,Los Angeles,20,5,30,10
California,San Francisco,40,5,100,5
New York,New York,50,2,20,9
Ohio,Cleveland,100,3,40,6


In [8]:
# Reshape the churn DataFrame by unstacking
churn_unstack = churn.unstack()

# Print churn_unstack
(churn_unstack)


time,day,day,day,day,day,day,day,day,night,night,night,night,night,night,night,night
feature,text_messages,text_messages,text_messages,text_messages,total_GB,total_GB,total_GB,total_GB,text_messages,text_messages,text_messages,text_messages,total_GB,total_GB,total_GB,total_GB
city,Cleveland,Los Angeles,New York,San Francisco,Cleveland,Los Angeles,New York,San Francisco,Cleveland,Los Angeles,New York,San Francisco,Cleveland,Los Angeles,New York,San Francisco
state,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
California,,20.0,,40.0,,5.0,,5.0,,30.0,,100.0,,10.0,,5.0
New York,,,50.0,,,,2.0,,,,20.0,,,,9.0,
Ohio,100.0,,,,3.0,,,,40.0,,,,6.0,,,


In [9]:

# Reshape churn by unstacking the first row level
churn_first = churn.unstack(level=0)

# Print churn_zero
(churn_first)


time,day,day,day,day,day,day,night,night,night,night,night,night
feature,text_messages,text_messages,text_messages,total_GB,total_GB,total_GB,text_messages,text_messages,text_messages,total_GB,total_GB,total_GB
state,California,New York,Ohio,California,New York,Ohio,California,New York,Ohio,California,New York,Ohio
city,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
Cleveland,,,100.0,,,3.0,,,40.0,,,6.0
Los Angeles,20.0,,,5.0,,,30.0,,,10.0,,
New York,,50.0,,,2.0,,,20.0,,,9.0,
San Francisco,40.0,,,5.0,,,100.0,,,5.0,,


In [10]:

# Reshape churn by unstacking the second row level
churn_second = churn.unstack(level=1)

# Print churn_second
(churn_second)

time,day,day,day,day,day,day,day,day,night,night,night,night,night,night,night,night
feature,text_messages,text_messages,text_messages,text_messages,total_GB,total_GB,total_GB,total_GB,text_messages,text_messages,text_messages,text_messages,total_GB,total_GB,total_GB,total_GB
city,Cleveland,Los Angeles,New York,San Francisco,Cleveland,Los Angeles,New York,San Francisco,Cleveland,Los Angeles,New York,San Francisco,Cleveland,Los Angeles,New York,San Francisco
state,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
California,,20.0,,40.0,,5.0,,5.0,,30.0,,100.0,,10.0,,5.0
New York,,,50.0,,,,2.0,,,,20.0,,,,9.0,
Ohio,100.0,,,,3.0,,,,40.0,,,,6.0,,,


In [11]:
# edited/added
churn = pd.read_csv('churn_long_v2.csv')
churn = pd.pivot_table(churn, index=['time', 'type', 'exited'], columns=['metric'], values='value')
churn.columns.names = [None]

# Unstack the time level from churn
churn_time = churn.unstack(level='time')

# Print churn_time
(churn_time)



Unnamed: 0_level_0,Unnamed: 1_level_0,calls,calls,calls,charge,charge,charge,minutes,minutes,minutes
Unnamed: 0_level_1,time,day,eve,night,day,eve,night,day,eve,night
type,exited,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
International,churn,97.0,,67.0,31.37,,56.59,184.5,,332.9
International,no churn,,117.0,,,20.28,,,119.3,
National,churn,137.0,,,21.95,,,129.1,,
National,no churn,,88.0,103.0,,23.31,18.77,,137.1,110.4


In [12]:
# Sort the index in descending order
churn_time = churn.unstack(level='time').sort_index(ascending=False)

# Print churn_time
(churn_time)

Unnamed: 0_level_0,Unnamed: 1_level_0,calls,calls,calls,charge,charge,charge,minutes,minutes,minutes
Unnamed: 0_level_1,time,day,eve,night,day,eve,night,day,eve,night
type,exited,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
National,no churn,,88.0,103.0,,23.31,18.77,,137.1,110.4
National,churn,137.0,,,21.95,,,129.1,,
International,no churn,,117.0,,,20.28,,,119.3,
International,churn,97.0,,67.0,31.37,,56.59,184.5,,332.9


In [13]:
# Unstack churn by type level
churn_type = churn.unstack(level='type')

# Stack churn_final using the first column level
churn_final = churn_type.stack(level=0)

# Print churn_final
(churn_final)

Unnamed: 0_level_0,Unnamed: 1_level_0,type,International,National
time,exited,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
day,churn,calls,97.0,137.0
day,churn,charge,31.37,21.95
day,churn,minutes,184.5,129.1
eve,no churn,calls,117.0,88.0
eve,no churn,charge,20.28,23.31
eve,no churn,minutes,119.3,137.1
night,churn,calls,67.0,
night,churn,charge,56.59,
night,churn,minutes,332.9,
night,no churn,calls,,103.0


In [14]:
# Switch the first and third row index levels in churn
churn_swap = churn.swaplevel(0, 2)

# Print churn_swap
print(churn_swap)



                              calls  charge  minutes
exited   type          time                         
churn    International day     97.0   31.37    184.5
         National      day    137.0   21.95    129.1
no churn International eve    117.0   20.28    119.3
         National      eve     88.0   23.31    137.1
churn    International night   67.0   56.59    332.9
no churn National      night  103.0   18.77    110.4


In [15]:
# Switch the first and third row index levels in churn
churn_swap = churn.swaplevel(0, 2)

# Reshape by unstacking the last row level 
churn_unstack = churn_swap.unstack()

# Print churn_unstack
(churn_unstack)

Unnamed: 0_level_0,Unnamed: 1_level_0,calls,calls,calls,charge,charge,charge,minutes,minutes,minutes
Unnamed: 0_level_1,time,day,eve,night,day,eve,night,day,eve,night
exited,type,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
churn,International,97.0,,67.0,31.37,,56.59,184.5,,332.9
churn,National,137.0,,,21.95,,,129.1,,
no churn,International,,117.0,,,20.28,,,119.3,
no churn,National,,88.0,103.0,,23.31,18.77,,137.1,110.4


In [17]:
# edited/added
churn = pd.read_csv('churn_long_v3.csv')
churn = pd.pivot_table(churn, index=['exited', 'state', 'city'], columns=['year', 'plan'], values='value')

# Unstack the first and second row level of churn
churn_unstack = churn.unstack(level=[0, 1])

# Print churn_unstack
(churn_unstack)


year,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,...,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020
plan,data,data,data,data,minutes,minutes,minutes,minutes,voicemail,voicemail,...,data,data,minutes,minutes,minutes,minutes,voicemail,voicemail,voicemail,voicemail
exited,churn,churn,no_churn,no_churn,churn,churn,no_churn,no_churn,churn,churn,...,no_churn,no_churn,churn,churn,no_churn,no_churn,churn,churn,no_churn,no_churn
state,California,New York,California,New York,California,New York,California,New York,California,New York,...,California,New York,California,New York,California,New York,California,New York,California,New York
city,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
Los Angeles,2.0,,3.0,,0.0,,0.0,,1.0,,...,2.0,,1.0,,1.0,,1.0,,0.0,
New York,,5.0,,4.0,,1.0,,1.0,,0.0,...,,6.0,,0.0,,1.0,,1.0,,0.0


In [19]:

# Unstack the first and second row level of churn
churn_unstack = churn.unstack(level=[0, 1])

# Stack the resulting DataFrame using plan and year
churn_py = churn_unstack.stack(['plan', 'year'])

# Print churn_py
(churn_py)


Unnamed: 0_level_0,Unnamed: 1_level_0,exited,churn,churn,no_churn,no_churn
Unnamed: 0_level_1,Unnamed: 1_level_1,state,California,New York,California,New York
city,plan,year,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Los Angeles,data,2019,2.0,,3.0,
Los Angeles,data,2020,5.0,,2.0,
Los Angeles,minutes,2019,0.0,,0.0,
Los Angeles,minutes,2020,1.0,,1.0,
Los Angeles,voicemail,2019,1.0,,1.0,
Los Angeles,voicemail,2020,1.0,,0.0,
New York,data,2019,,5.0,,4.0
New York,data,2020,,2.0,,6.0
New York,minutes,2019,,1.0,,1.0
New York,minutes,2020,,0.0,,1.0


In [20]:

# Unstack the first and second row level of churn
churn_unstack = churn.unstack(level=[0, 1])

# Stack the resulting DataFrame using plan and year
churn_py = churn_unstack.stack(['plan', 'year'])

# Switch the first and second column levels
churn_switch = churn_py.swaplevel(0, 1, axis=1)

# Print churn_switch
(churn_switch)

Unnamed: 0_level_0,Unnamed: 1_level_0,state,California,New York,California,New York
Unnamed: 0_level_1,Unnamed: 1_level_1,exited,churn,churn,no_churn,no_churn
city,plan,year,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Los Angeles,data,2019,2.0,,3.0,
Los Angeles,data,2020,5.0,,2.0,
Los Angeles,minutes,2019,0.0,,0.0,
Los Angeles,minutes,2020,1.0,,1.0,
Los Angeles,voicemail,2019,1.0,,1.0,
Los Angeles,voicemail,2020,1.0,,0.0,
New York,data,2019,,5.0,,4.0
New York,data,2020,,2.0,,6.0
New York,minutes,2019,,1.0,,1.0
New York,minutes,2020,,0.0,,1.0


In [None]:
# edited/added
churn = pd.read_csv('churn_long_v4.csv')
churn

Unnamed: 0,state,international_plan,voice_mail_plan,churn,variable,value
0,LA,No,No,False,total_day_calls,106.818
1,LA,No,No,False,total_night_calls,96.909
2,LA,No,No,True,total_day_calls,100.0
3,LA,No,No,True,total_night_calls,119.0
4,LA,No,Yes,False,total_day_calls,100.0
5,LA,No,Yes,False,total_night_calls,84.25
6,NY,No,No,False,total_day_calls,90.9
7,NY,No,No,False,total_night_calls,100.8
8,NY,No,No,True,total_day_calls,95.0
9,NY,No,No,True,total_night_calls,101.5


In [None]:
churn = pd.pivot_table(churn, index=['state', 'international_plan', 'voice_mail_plan', 'churn'], columns=['variable'], values='value')

churn.columns.names = [None]

churn

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,total_day_calls,total_night_calls
state,international_plan,voice_mail_plan,churn,Unnamed: 4_level_1,Unnamed: 5_level_1
LA,No,No,False,106.818,96.909
LA,No,No,True,100.0,119.0
LA,No,Yes,False,100.0,84.25
LA,Yes,No,False,78.0,90.0
LA,Yes,TRUE,False,69.0,104.0
LA,Yes,Yes,False,71.0,101.0
NY,No,No,False,90.9,100.8
NY,No,No,True,95.0,101.5
NY,No,Yes,False,115.0,121.0
NY,Yes,No,False,109.0,99.0


In [None]:


# Unstack churn level and fill missing values with zero
churn = churn.unstack(level='churn', fill_value=0)

# Sort by descending voice mail plan and ascending international plan
churn_sorted = churn.sort_index(level=["voice_mail_plan", "international_plan"], 
                                ascending=[False, True])

# Print final DataFrame and observe pattern
(churn_sorted)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_day_calls,total_day_calls,total_night_calls,total_night_calls
Unnamed: 0_level_1,Unnamed: 1_level_1,churn,False,True,False,True
state,international_plan,voice_mail_plan,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
LA,No,Yes,100.0,0.0,84.25,0.0
NY,No,Yes,115.0,0.0,121.0,0.0
LA,Yes,Yes,71.0,0.0,101.0,0.0
NY,Yes,Yes,120.0,0.0,78.0,0.0
LA,Yes,TRUE,69.0,0.0,104.0,0.0
LA,No,No,106.818,100.0,96.909,119.0
NY,No,No,90.9,95.0,100.8,101.5
LA,Yes,No,78.0,0.0,90.0,0.0
NY,Yes,No,109.0,87.0,99.0,113.0


In [None]:
# edited/added
churn = pd.read_csv('churn_long_v5.csv')
churn = pd.pivot_table(churn, index=['location'], columns=['type', 'scope'], values='value')
churn.index.names = [None]

# Stack the level type from churn
churn_stack = churn.stack(level='type')

# Fill the resulting missing values with zero
churn_fill = churn_stack.fillna(0)

# Print churn_fill 
print(churn_fill)


scope                 International  National
   type                                      
CA Total Day Calls              8.0       0.0
   Total Night Calls           34.0      24.0
LA Total Day Calls             23.0       0.0
   Total Night Calls           30.0       0.0
NY Total Day Calls              8.0       0.0
   Total Night Calls           34.0      24.0


In [None]:

# Stack the level scope without dropping rows with missing values
churn_stack = churn.stack(level='scope', dropna=False)

# Fill the resulting missing values with zero 
churn_fill = churn_stack.fillna(0)

# Print churn_fill
print(churn_fill)

type              Total Day Calls  Total Night Calls
   scope                                            
CA International              8.0               34.0
   National                   0.0               24.0
LA International             23.0               30.0
   National                   0.0                0.0
NY International              8.0               34.0
   National                   0.0               24.0
