In [3]:
def ready_data(dataname):

    """With the help of this function we take the path to excel file and its name 
       and take it as a string. It converts columns to integers, then returns the modified dataframe"""
    
    from pandas import read_excel

    dataframe = read_excel(dataname, index_col=0)

    dataframe["region"].replace({"Zone 3": 2, "Zone 2": 1, "Zone 1": 0}, inplace=True)
    dataframe["marital"].replace({"Married": 1, "Unmarried": 0}, inplace=True)
    dataframe["retire"].replace({"Yes": 1, "No": 0}, inplace=True)
    dataframe["gender"].replace({"Male": 0, "Female": 1}, inplace=True)
    dataframe["voice"].replace({"Yes": 1, "No": 0}, inplace=True)
    dataframe["internet"].replace({"Yes": 1, "No": 0}, inplace=True)
    dataframe["forward"].replace({"Yes": 1, "No": 0}, inplace=True)
    dataframe["churn"].replace({"Yes": 1, "No": 0}, inplace=True)
    dataframe["ed"].replace({"Post-undergraduate degree": 4, "College degree": 3, "Some college": 2, "High school degree": 1, "Did not complete high school": 0}, inplace=True)
    dataframe["custcat"].replace({"Total service": 3, "Plus service": 2, "E-service": 1, "Basic service": 0}, inplace=True)

    return dataframe

def best_model(dataframe, duration="tenure", event="churn"):

    """This function takes the following arguments: DataFrame(df), name of column to be set as duration as string(duration)
    (optional), name of column to be set as event as string(event) (optional).
 
    This function compares the AIC values of 4 models: WeibullAFTFitter(), LogNormalAFTFitter(),LogLogisticAFTFitter(),
                                                               CoxPHFitter()
    
    In the end the function returns the model with the least AIC score."""
    
    from lifelines import LogNormalAFTFitter, WeibullAFTFitter, LogLogisticAFTFitter

    fitters = [LogNormalAFTFitter(), WeibullAFTFitter(), LogLogisticAFTFitter()]
    
    least_aic = float('inf')
    for fitter in fitters:
        fitter.fit(dataframe, duration_col=duration, event_col=event)
        try:
            aic = fitter.AIC_
        except:
            aic = fitter.AIC_partial_

        if  aic < least_aic:
            least_aic = aic
            least_fitter = fitter
    return least_fitter



def calc_CLV(dataframe, duration="tenure", event="churn"):

    """This function takes, following arguments: DataFrame(df), name of column to be set as duration as string(duration) 
    (optional), name of column to be set as event as string(event) (optional)

    In the end the function returns the predicted survival function."""

    fitter = best_model(dataframe, duration, event)
    fitter.fit(dataframe, duration_col=duration, event_col=event)
    predictedfunction = fitter.predict_survival_function(dataframe)
    predictedfunction = predictedfunction.iloc[0:24,0:5]
    
    MM = 1000
    r = 0.1

    for i in range(1, 6):
        for index, row in predictedfunction.iterrows():
            row[i] = row[i] / pow((1 + r/12),i-1)

    predictedfunction['CLV'] = MM * predictedfunction.sum(axis = 1)
    return predictedfunction['CLV'].describe()
    
def vis_CLV(dataframe, compare, duration="tenure", event="churn"):

    """This function takes, following arguments: DataFrame(df), name of the column to view the partial effects on outcome
    (comare), name of column to be set as duration as string(duration) (optional), 
    name of column to be set as event as string(event) (optional)
    
    Then the function plots two graphs:
    
    #. Plot the cooeffitients and ranges
    #. Plot the effects on outcome graph"""

    from matplotlib.pyplot import figure, show, close
    
    if compare == "age":
        value = range(20, 80, 5)
    elif compare == "address":
        value = range(0, 55, 5)
    elif compare == "income":
        value = range(0, 1670, 50)
    else:
        value = range(min(dataframe[compare]), max(dataframe[compare])+1)

    fitter = best_model(dataframe, duration, event)
    fitter.fit(dataframe, duration_col=duration, event_col=event)
    figure(figsize=(20,10))
    fitter.plot_partial_effects_on_outcome(compare, value)
    show()
    close()

def hyp_test_segment(dataframe, compare, alpha=None, duration="tenure", event="churn"):
    
    """This function takes, following arguments: DataFrame(df), name of the column to view the partial effects on outcome
    (comare), The value of alpha(alpha) (optional), name of column to be set as duration as string(duration) (optional),
    name of column to be set as event as string(event) (optional)
    
    Then the functions checks all the unique values of columns that have categorical data. The it prints
    the name of columns and the values whose hypothesis can be rejected.
    In the case if none of the hypothesis can be rejected the functions returns yes."""
    
    import warnings
    warnings.filterwarnings("ignore")
    
    fitter = best_model(dataframe, duration, event)
    
    n = False
    for i in dataframe.columns:
        if len(dataframe[i].unique())<= 10 and i != "retire":
            for j in dataframe[i].unique():
                try:
                    temp = dataframe[dataframe[i] == j] 
                    fitter.fit(temp, duration_col=duration, event_col=event)

                    if alpha == None:
                        alpha = fitter.alpha
                    p = fitter.summary["p"]["mu_"][compare]

                    if type(p) == 'numpy.float64':
                        if p < alpha:
                            n = True
                            print("Reject Null Hypothesis for", i, j)
                except:
                    pass
    if n == False:
        print("Null Hypothesis not rejected for any segment")

