<a href="https://colab.research.google.com/github/gabrielborja/parc_de_montjuic/blob/main/survival_scripts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Survival EDA

## Importing libraries

In [None]:
# Updating libraries version
!pip install matplotlib --upgrade
!pip install plotly --upgrade

In [1]:
# Importing python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from ipywidgets import interact

## Loading Data

In [None]:
# Loading data from local drive
from google.colab import files
uploaded1 = files.upload()

In [3]:
# Storing loaded data from csv to a pandas dataframe
import io
df1 = pd.read_csv(io.BytesIO(uploaded1['Script_202209071134.csv']), sep='|', engine='python')

In [None]:
# Checking the dataframe info
df1.info()

## Feature Engineering

In [5]:
# Filling NA with 0 in binary columns
values_bi = {i:0 for i in df1.iloc[:,1:10].columns}
df1.fillna(value=values_bi, inplace=True)

In [6]:
# Filling NA with object in categorical columns
values_cat = {df1.columns[14]:'No data usage', df1.columns[18]:'No data usage', df1.columns[20]:'Unknown',
              df1.columns[21]:'No voice usage'}
df1.fillna(value=values_cat, inplace=True)

In [None]:
# Creating binary columns from numeric data
df1 = df1.assign(has_mb_to_l3m = np.where(df1.iloc[:,11]>0,1,0),
                 has_topup_cu_mo = np.where(df1.iloc[:,12]>0,1,0),
                 has_topup_pv_mo = np.where(df1.iloc[:,13]>0,1,0))
#df1.iloc[:,[13,25]].head(25)

In [8]:
# Cleaning categorical columns
df1 = df1.assign(TOP_CUSTOMER_COUNTY_NAME = df1.iloc[:,20].str[:15].str.title())

In [9]:
# Slicing dataframe with selected columns
df1_a = df1.iloc[:,np.r_[1:10,23:26,14:23]]

In [10]:
# Checking the cleaned dataframe info
df1_a.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51413 entries, 0 to 51412
Data columns (total 21 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   BUYOUT_COMMITMENT          51413 non-null  float64
 1   CONTRACT_COMMITMENT        51413 non-null  float64
 2   SUBSCRIBER_ONNET           51413 non-null  float64
 3   SUBSCRIBER_VOLTE_DATA      51413 non-null  float64
 4   SUBSCRIBER_VOLTE_DATA_L3M  51413 non-null  float64
 5   SUBSCRIBER_VOLTE_VOICE     51413 non-null  float64
 6   SUBSCRIBER_VOWIFI_DATA     51413 non-null  float64
 7   SUBSCRIBER_VOWIFI_VOICE    51413 non-null  float64
 8   VOLTE_ENABLED              51413 non-null  float64
 9   has_mb_to_l3m              51413 non-null  int64  
 10  has_topup_cu_mo            51413 non-null  int64  
 11  has_topup_pv_mo            51413 non-null  int64  
 12  DATA_USAGE                 51413 non-null  object 
 13  DISCOUNT_CATEGORY          51413 non-null  obj

## Exploring Data

In [None]:
# Exploring list of columns with value counts
@interact(Column_name=df1.columns[1:], Percentage=[True, False])
def explore_value_counts(Column_name, Percentage):
  df = df1.value_counts(subset=[Column_name], normalize=Percentage, dropna=False)
  return df

In [None]:
# Exploring each numeric column
@interact(Column_name=df1_a.columns)
def explore_value_counts(Column_name):
  return df1_a.loc[:,[Column_name]].describe()

In [None]:
# Exploring number of categories in each categorical column
for cat in df1_a.columns:
  print(f'{cat} => {len(df1_a[cat].unique())}')