In [1]:
import pandas as pd
from pandas.api.types import is_numeric_dtype
from random import randint
import numpy as np


# 1. Generating sample dataset


Here we generate a sample dataset with columns:  
<li> <b> ids </b> - user IDs.
<li> <b> ad_shown </b> - 1 if ad was shown for this user in this session or 0 otherwise.
<li> <b> ad_clicked </b> - 1 if ad link was clicked by this user in this session or 0 otherwise.
<li> <b> page_time </b> - time spent by user on page in seconds.
<li> <b> item_bought </b> - 1 if item was bought by this user in this session or 0 otherwise.    
<li> <b> quantity </b> - quantity of items bought by user.
<li> <b> price </b> - price per item. 

In [2]:
N = 100 # Number of rows
ids = [randint(100,140) for x in range(N)]
ad_shown = [randint(0,1) for x in range(N)]
ad_clicked = [randint(0,1) if x ==1 else 0 for x in ad_shown ]
page_time = [randint(1,100) if x ==1 else 0 for x in ad_clicked ]
item_bought = [randint(0,1) if x ==1 else 0 for x in ad_clicked ]
quantity = [randint(1,5) if x ==1 else 0 for x in item_bought ]
price = [5 if x ==1 else 0 for x in item_bought ]


In [3]:
df = pd.DataFrame(np.column_stack([ids, ad_shown, ad_clicked, page_time, item_bought, quantity, price]), 
                    columns=["ids", "ad_shown", "ad_clicked", "page_time", "item_bought", "quantity", "price"],
                    dtype=int)

In [4]:
# Empty DF for edge cases:
columns = ["ids", "ad_shown", "ad_clicked", "page_time", "item_bought", "quantity", "price"]
df_zero = pd.DataFrame(np.zeros((N, len(columns))), columns = columns, dtype=int)
df_zero.head(2)

Unnamed: 0,ids,ad_shown,ad_clicked,page_time,item_bought,quantity,price
0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0


In [5]:
df.head(10)

Unnamed: 0,ids,ad_shown,ad_clicked,page_time,item_bought,quantity,price
0,132,0,0,0,0,0,0
1,122,1,0,0,0,0,0
2,116,0,0,0,0,0,0
3,117,1,1,91,0,0,0
4,112,1,1,33,1,3,5
5,101,0,0,0,0,0,0
6,101,1,0,0,0,0,0
7,110,0,0,0,0,0,0
8,105,1,1,62,0,0,0
9,105,1,0,0,0,0,0



# 2. Implementing functions.  

## 2.1 Click-Through Rate


<p><center><b> Click-Through Rate (CTR) </b> </center></p>
<b> Click-Through Rate (CTR) </b> is calculated as: $$ CTR = \frac {\text{Total Measured Clicks}}{\text{Total Measured Ad Impressions} } \times 100 \% $$  
<p>
<li><b>Total Measured Clicks </b> - is the total amount of clicks on an ad   
<li><b>Total Measured Ad Impressions </b> - is the number of times an ad was loaded on a page  
</p>
So CTR function should take a list of users for whom the ad was shown and their actions and output a single value in the range from 0 to 100.  

Edge cases:  
<li> Data has less dimensions
<li> Length of Data is zero 
<li> Data is not numeric 
<li> Data contains digits except 0 and 1
<li> TMAI is zero 
<li> TMC > TMAI </li>
Let`s implement CTR function:

In [6]:
def CTR(X, ad_shown, id_actions):
    
    if X.ndim < 2 or len(X) == 0:
        return "2D array expected!"
    else:
        if X.shape[1] < 2:
            return "Data should have 2 colums!"
        
        elif not (is_numeric_dtype(X[ad_shown]) and is_numeric_dtype(X[id_actions])):
            return "Data should numeric!"
        
        elif set(X[id_actions].unique()).union([0,1]) -{0,1} != set() or \
                set(X[ad_shown].unique()).union([0,1]) -{0,1} != set():
            return("The values expected to be 0 or 1!")
        
        else:
            TMC = X[id_actions].astype(int).sum()
            TMAI = X[ad_shown].astype(int).sum()
            
            if TMAI <= 0 or TMC > TMAI:
                return "Something is wrong with the data!"
            
            CTR = TMC / TMAI * 100
        return round(CTR, 2)
    

Lets check the function on expected data and edge cases:

In [7]:
print("Expected output:")
print(CTR(df, "ad_shown", "ad_clicked"))
print("===========================================")
print("Edge cases:")
print(CTR(df, "ids", "ad_clicked"))
print(CTR(df.iloc[:0,:], "ad_shown", "ad_clicked"))
print(CTR(df.iloc[:,:0], "ad_shown", "ad_clicked"))
print(CTR(df_zero, "ad_shown", "ad_clicked"))

Expected output:
53.06
Edge cases:
The values expected to be 0 or 1!
2D array expected!
Data should have 2 colums!
Something is wrong with the data!



## 2.2 Return on Investment (ROI)


<p><center><b> Return on Investment (ROI) </b> </center></p>
<b> Return on Investment (ROI) </b> is calculated as: $$ ROI = \frac {\text{Amount Gained – Amount Spent}}{\text{Amount Spent} } \times 100 \% $$  
<p>
<li><b>Amount Gained </b> - is the amount of income that has been generated by an investment.   
<li><b>Amount Spent </b> - is the total amount spent on an investment.   
</p>
<p>
ROI stands for Return on Investment and means the amount of money you get back relative to the amount of money you put into something. It is different to profit, which is simply the amount spent subtracted from the amount earned. ROI goes a step further and works out profit per the amount spent. This answers the question – how much profit can I earn per pound/dollar/euro etc spent. </p>

So ROI function should take a list of users for whom the ad was shown, check if they clicked on ad and if yes, add their spent sum to Amount Gained than substract the Ammount Spent on the ad compain.  
  
  
In our case Amount Gained is: $\sum^k_{i=1}\, \text{item_bought}\times\text{quantity}\times\text{price}$  
Amount Spent is: $\sum^k_{i=1}\, \text{price_per_ad }$   

Output: any real number.
Edge cases:  
<li> Data has less dimensions
<li> Length of Data is zero 
<li> Data is not numeric 
<li> Data for item_bought, id_actions, ad_shown contains digits except 0 and 1
<li> Amount Spent is zero 

Let`s implement ROI function:

In [8]:
def ROI(X, ad_shown, id_actions, item_bouht, quantity, price, price_per_ad):
    
    if X.ndim < 2 or len(X) == 0:
        return "2D array expected!"
    else:
        if X.shape[1] < 4:
            return "Data should have 4 colums!"
        
        elif not(
                is_numeric_dtype(X[ad_shown]) and is_numeric_dtype(X[id_actions]) \
                and is_numeric_dtype(X[item_bouht]) and is_numeric_dtype(X[quantity]) \
                and is_numeric_dtype(X[price]) and (type(price_per_ad) == int or type(price_per_ad) == float)):
            return "Data should numeric!"
        
        elif  set(X[id_actions].unique()).union([0,1]) -{0,1} != set() or \
                    set(X[ad_shown].unique()).union([0,1]) -{0,1} != set() or \
                    set(X[item_bouht].unique()).union([0,1]) -{0,1} != set():
            return("The values expected to be 0 or 1!")
        
        else:
            AG = sum(X[id_actions].astype(int) * X[ad_shown].astype(int) * X[item_bouht].astype(int) * \
            X[quantity].astype(int) * X[price].astype(int))
            AS = price_per_ad * X[ad_shown].astype(int).sum()

            
            if AS <= 0:
                return "Something is wrong with the data!"
            
            ROI = (AG - AS) / AS * 100
        return round(ROI, 2)
    

Lets check the function on expected data and edge cases:

In [9]:
print("Expected output:")
print(ROI(df, "ad_shown", "ad_clicked",  "item_bought", "quantity", "price", 0.5))
print("===========================================")
print("Edge cases:")
print(ROI(df, "ad_shown", "ad_clicked",  "item_bought", "quantity", "price", "0.5"))
print(ROI(df, "ids", "ad_clicked",  "item_bought", "quantity", "price", 0.5))
print(ROI(df.iloc[:0,:], "ad_shown", "ad_clicked",  "item_bought", "quantity", "price", 0.5))
print(ROI(df.iloc[:,:3], "ad_shown", "ad_clicked",  "item_bought", "quantity", "price", 0.5))
print(ROI(df_zero, "ad_shown", "ad_clicked",  "item_bought", "quantity", "price", 0.5))

Expected output:
757.14
Edge cases:
Data should numeric!
The values expected to be 0 or 1!
2D array expected!
Data should have 4 colums!
Something is wrong with the data!



## 2.3 Average Page Time


<p><center><b> Average Page Time </b> </center></p>
<b> Average Page Time </b> is calculated as: $$ APT = \frac {\sum\,\text{Time Spent on a Page by a User}}{\text{Number of Users} }  $$  
<p>
<li><b>Time Spent on a Page by a User </b> - is time measured for each user who visits a webpage.   
<li><b>Number of Users </b> - is the number of users who visit a webpage.   
</p>
<p>
Usually users who spend less than 5 seconds on a webpage are not included in the calculations. </p>


In our case Time Spent on a Page by a User is: $$\sum \text{ad_clicked}\times\text{page_time}\times \text{I(page_time>5)}$$  
Number of Users is: $$ \text{len(}\text{set(ad_clicked } \times \text{I(page_time>5))}\text{)}$$ 

<p> Where $ \text{I(page_time > 5)} $ is: </p>  

$$ \text{I(page_time > 5)}= \begin{cases}
1, & \text{if page_time $<5$;} \\
0, & \text{if page_time $\geqslant 0$.}
\end{cases} $$


Output: any real positive number.
Edge cases:  
<li> Data has less dimensions
<li> Length of Data is zero 
<li> Data is not numeric 
<li> Data for item_bought, id_actions, ad_shown contains digits except 0 and 1
<li> Number of users is zero 

Let`s implement AVP function:


In [10]:
def AVP(X, ids, id_actions, page_time):
    
    #Checking for edge cases:
    if X.ndim < 2 or len(X) == 0:
        return "2D array expected!"
    else:
        if X.shape[1] < 3:
            return "Data should have 3 colums!"
        
        elif not is_numeric_dtype(X[ids]) and is_numeric_dtype(X[id_actions]) \
                and is_numeric_dtype(X[page_time]):
            return "Data should numeric!"
        
        elif  set(X[id_actions].unique()).union([0,1]) -{0,1} != set():
            return(str(id_actions)+" column values expected to be 0 or 1!")
        
        else:
            # Sum of page time for all users clicked on add if page_time is greater than 5
            TSPU = sum(X[page_time][(X[id_actions] == 1) & ( X[page_time] > 5)].astype(int))
            
            #Calculating the amount of unique users clicked on ad if their page_time is greater than 5
            NU = len(X[ids][(X[id_actions] == 1) & ( X[page_time] > 5)].unique())
 
            if NU <= 0:
                return "Something is wrong with the data!"
            
            AVP = TSPU / NU
        return round(AVP, 2)

    

Lets check the function on expected data and edge cases:

In [11]:
print("Expected output:")
print(AVP(df,"ids", "ad_clicked", "page_time"))
print("===========================================")
print("Edge cases:")
print(AVP(df, "ids", "ids", "page_time"))
print(AVP(df.iloc[:0,:], "ids", "ad_clicked", "page_time"))
print(AVP(df.iloc[:,:2], "ids", "ad_clicked", "page_time"))
print(AVP(df_zero, "ids", "ad_clicked", "page_time"))

Expected output:
68.41
Edge cases:
ids column values expected to be 0 or 1!
2D array expected!
Data should have 3 colums!
Something is wrong with the data!



## 2.4 Customer Lifetime Value (CLV)


<p><center><b> Customer Lifetime Value (CLV) </b> </center></p>  

I assume there is a typo in this metric task: it seems unlogical to substract Average Purchase Frequency from Average Purchase Value, so I took a different formula for CLV.  
  
  
<b> Customer Lifetime Value (CLV) </b> is calculated as: $$ CLV = \text{Average Purchase Value}\times\text{Average Purchase Frequency} \times \text{Average Customer Lifespan}  $$  
<p>
<li><b>Average Purchase Value </b> - is Total Revenue / # of Purchases.   
<li><b>Average Purchase Frequency </b> - is the # of Purchases / Unique Customers. 
<li><b>Average Customer Lifespan </b> - is the time of how long customers stay with your business. 
</p>



In our case Average Purchase Value is: $$ \frac {\sum\, \text{item_bought}\times\text{quantity}\times \text{price}}{\sum{\text{item_bought}}}$$  
Average Purchase Frequency is: $$ \frac{ \sum\, \text{item_bought} } { \sum\, \text{ids} \times \text{item_bought} }$$




Output: any real positive number.
Edge cases:  
<li> Data has less dimensions
<li> Length of Data is zero 
<li> Data is not numeric 
<li> Data for item_bought contains digits except 0 and 1
<li> Number of users is zero 

Let`s implement CLV function:


In [12]:
def CLV(X, ids, item_bought, quantity, price, average_lifespan):
    
    #Checking for edge cases:
    if X.ndim < 2 or len(X) == 0:
        return "2D array expected!"
    else:
        if X.shape[1] < 4:
            return "Data should have 4 colums!"
        
        elif not (is_numeric_dtype(X[ids]) and is_numeric_dtype(X[item_bought]) \
                and is_numeric_dtype(X[quantity]) and is_numeric_dtype(X[price]) \
                 and (type(average_lifespan) == int or type(average_lifespan) ==  float) ):
            return "Data should numeric!"
        
        elif  set(X[item_bought].unique()).union([0,1]) -{0,1} != set():
            return(str(id_actions)+" column values expected to be 0 or 1!")
        
        else:
            # Total Revenue
            Total_Purchases = sum(X[item_bought].astype(int) * X[quantity].astype(int) * X[price].astype(int))
            #Number of purchases
            Num_Perchases = sum(X[item_bought].astype(int))
            #Number of unique customers
            Num_Customers = len(X[ids].unique())
            
            if Num_Perchases <= 0 or Num_Customers <= 0:
                return "Something is wrong with the data!"
            
            APV = Total_Purchases / Num_Perchases
            APF = Num_Perchases / Num_Customers
            #Calculating CLV
            CLV = APV * APF * average_lifespan
        return round(CLV, 2)

    

Lets check the function on expected data and edge cases:

In [13]:
print("Expected output:")
print(CLV(df, "ids", "item_bought", "quantity", "price", 2))
print("===========================================")
print("Edge cases:")
print(CLV(df, "ids", "item_bought", "quantity", "price", "2"))
print(CLV(df.iloc[:0,:], "ids", "item_bought", "quantity", "price", 2))
print(CLV(df.iloc[:,:2], "ids", "item_bought", "quantity", "price", 2))
print(CLV(df_zero, "ids", "item_bought", "quantity", "price", 2))

Expected output:
11.05
Edge cases:
Data should numeric!
2D array expected!
Data should have 4 colums!
Something is wrong with the data!



## 2.5 Conversion Rate (CR).


<p><center><b> Conversion Rate (CR) </b> </center></p>  
   
  
<b> Conversion Rate (CR) </b> is calculated as: $$ CR = \frac {\text{Total Attributed Conversion}}{\text{Total Measured Clicks}} \times \text{100%}  $$  
<p>
<li><b>Total Attributed Conversion </b> - is the total amount of conversion recorded which have been caused clicks.   
<li><b>Total Clicks </b> - is the number of times an ad was clicked on. 

 In our case Total Attributed Conversion is: $ \sum \text{item_bought}$  
    
Total Clicks is: $ \ \sum \text{ad_clicked} $
    
    
Output: any real positive number.
Edge cases:  
<li> Data has less dimensions
<li> Length of Data is zero 
<li> Data is not numeric 
<li> Data for item_bought contains digits except 0 and 1
<li> Number of users is zero 

Let`s implement CR function:


In [14]:
def CR(X, ad_clicked, item_bought):
    #Checking for edge cases:
    if X.ndim < 2 or len(X) == 0:
        return "2D array expected!"
    else:
        if X.shape[1] < 2:
            return "Data should have 2 colums!"
        
        elif not (is_numeric_dtype(X[ad_clicked]) and is_numeric_dtype(X[item_bought])):
            return "Data should numeric!"
        
        elif  set(X[ad_clicked].astype(int).unique()).union([0,1]) -{0,1} != set() or \
                set(X[item_bought].astype(int).unique()).union([0,1]) -{0,1} != set():
            return("Column values expected to be 0 or 1!")
        
        else:
            #Total Conversion
            Total_Conversion = sum(X[item_bought].astype(int))
            #Total Clicks
            Total_Clicks = sum(X[ad_clicked].astype(int))
            
            if Total_Clicks <= 0 or Total_Clicks < Total_Conversion:
                return "Something is wrong with the data!"
            
            #Calculating CR
            CR = Total_Conversion / Total_Clicks
        return round(CR, 2)

Lets check the function on expected data and edge cases:

In [15]:
print("Expected output:")
print(CR(df, "ad_clicked", "item_bought"))
print("===========================================")
print("Edge cases:")
print(CR(df.iloc[:0,:],  "ad_clicked", "item_bought"))
print(CR(df.iloc[:,:1],  "ad_clicked", "item_bought"))
print(CR(df_zero,  "ad_clicked", "item_bought"))

Expected output:
0.58
Edge cases:
2D array expected!
Data should have 2 colums!
Something is wrong with the data!



# 3. Additional Metrics.



## 3.1. Bounce rate.


Essentially, bounce rate is the percentage of website visitors who look at one page then leave right after. Having a high bounce rate indicates that your content, copy, or offer aren’t keeping people on the site, which also translates to sales pipeline breakdowns. 

We will calculate it by measuring how many users did`t spend much time after clicking the ad comparing to all who clicked on the ad.  

the formula is: $$ \text{BR} = \frac { \sum \text{Users Left Right After}}{\sum \text{All Users Clicked On Ad}} $$  
  
  
Input: DataFrame, ad_clicked: {0,1}, time_spend_on_page: int , limit: integer  
limit: is time limit which we consider to be short  

Output: any real positive number in range from 0 to 100.  

Edge cases:  
<li> Data has less dimensions
<li> Length of Data is zero 
<li> Data is not numeric 
<li> Data for ad_clicked contains digits except 0 and 1
<li> Number of users is zero 

Let`s implement BR function:

In [16]:
def BR(X, ad_clicked, time_spend, limit):
    #Checking for edge cases:
    if X.ndim < 2 or len(X) == 0:
        return "2D array expected!"
    else:
        if X.shape[1] < 2:
            return "Data should have 2 colums!"
        
        elif not (is_numeric_dtype(X[ad_clicked]) and is_numeric_dtype(X[time_spend]) \
                  and (type(limit) == int or type(limit) ==  float)):
            return "Data should numeric!"
        
        elif  set(X[ad_clicked].astype(int).unique()).union([0,1]) -{0,1} != set():
            return("Column values expected to be 0 or 1!")
        
        else:
            #Users Left
            Users_Left = sum(X[ad_clicked][X[time_spend] < limit].astype(int))
            #Total Clicks
            Total_Clicks = sum(X[ad_clicked].astype(int))
            
            if Total_Clicks <= 0 or Total_Clicks < Users_Left:
                return "Something is wrong with the data!"
            
            #Calculating BR
            BR = Users_Left / Total_Clicks
        return round(BR, 2)

Lets check the function on expected data and edge cases:

In [17]:
print("Expected output:")
print(BR(df, "ad_clicked", "page_time", 50))
print("===========================================")
print("Edge cases:")
print(BR(df, "ad_clicked", "page_time", "50"))
print(BR(df.iloc[:0,:], "ad_clicked", "page_time", 101))
print(BR(df.iloc[:,:1], "ad_clicked", "page_time", 101))
print(BR(df_zero, "ad_clicked", "page_time", 101))

Expected output:
0.42
Edge cases:
Data should numeric!
2D array expected!
Data should have 2 colums!
Something is wrong with the data!



## 3.2 Cost Per Conversion.


<b>Cost per conversion</b> is a metric used to identify how much it actually costs a Web advertiser to acquire each real customer - one that actually makes a purchase. The cost includes all the traffic for the duration of a campaign, during which conversions are also tracked. To make CPC calculation easier, advertising companies usually provide "traffic packages," where the one paying for advertisements gets a specific number of views or a specific time span for a fixed amount.
  
$$ CPR = \frac {\sum \text{ad_shown} \times \text{Price Per Ad}} {\sum \text{item_bought}} $$
The formula for cost for conversion is simple: It is the total cost for generating the traffic divided by the number of conversions.  
For example, suppose that an ad campaign costs \\$ 100 for 100 views and at the end of the campaign, it yielded five conversions. In that case, the formula is 
$$ CPC = \frac {\$\text{100}} {\text{5}} = \$ 20 \text{ per conversion.}$$  

Input: DataFrame, ad_shown: {0,1}, ad_clicked: {0,1}, price_per_ad: integer or float  
 
Output: any real positive number.
Edge cases:  
<li> Data has less dimensions
<li> Length of Data is zero 
<li> Data is not numeric 
<li> Data for ad_clicked contains digits except 0 and 1
<li> Number of users is zero 

Let`s implement CPC function:

In [18]:
def CPC(X, ad_shown, ad_clicked, item_bought, price_per_ad):
    #Checking for edge cases:
    if X.ndim < 2 or len(X) == 0:
        return "2D array expected!"
    else:
        if X.shape[1] < 2:
            return "Data should have 2 colums!"
        
        elif not (is_numeric_dtype(X[ad_shown]) and is_numeric_dtype(X[item_bought]) \
                  and is_numeric_dtype(X[ad_clicked]) and (type(price_per_ad) == int \
                                                           or type(price_per_ad) ==  float)):
            return "Data should numeric!"
        
        elif  set(X[ad_shown].astype(int).unique()).union([0,1]) -{0,1} != set() \
                and set(X[item_bought].astype(int).unique()).union([0,1]) -{0,1} != set():
            return("Column values expected to be 0 or 1!")
        
        else:
            #Total ad price
            Total_Ad_Price = sum(X[ad_shown].astype(int)) * price_per_ad
            #Ammount of items bought
            Items_Bought = sum(X[ad_clicked].astype(int) * X[item_bought].astype(int))
            
            if Items_Bought <= 0:
                return "Something is wrong with the data!"
            
            #Calculating CPC
            CPC = Total_Ad_Price / Items_Bought
        return round(CPC, 2)

Lets check the function on expected data and edge cases:

In [19]:
print(CPC(df, "ad_shown", "ad_clicked", "item_bought", 0.5))
print("===========================================")
print("Edge cases:")
print(CPC(df, "ad_shown", "ad_clicked", "item_bought", "0.5"))
print(CPC(df.iloc[:0,:], "ad_shown", "ad_clicked", "item_bought", 0.5))
print(CPC(df.iloc[:,:1], "ad_shown", "ad_clicked", "item_bought", 0.5))
print(CPC(df_zero, "ad_shown", "ad_clicked", "item_bought", 0.5))

1.63
Edge cases:
Data should numeric!
2D array expected!
Data should have 2 colums!
Something is wrong with the data!



## 3.3 New Vs. Returning Visitors.


<b> New Vs. Returning Visitors. </b>
Analyze the \% of new users (unique visitors that did not visit the site for at least three months) a website acquires vs. \% of returning users (unique visitors who visited the website within three months period) on a monthly granularity to track audience behavior and measure engagement levels.


Calculation:  
<li><b>New Users </b> -are unique visitors that were not spotted visiting the site for at least N months. 
<li><b>Returning Users </b> - are the opposite of new users, i.e., every user that visited a site within the N months lifetime.

Formula:  
    $$ \text{New Vs Returning Visitors} = \frac { \sum \text{New Users} }{ \sum \text{Returning Users}}  $$  
    
For a single site: The user can see the ratio (number and percentage) of New Users vs. Returning users over time.


Usage:
Understand the types of users, their behaviors, and the impact of new vs. returning users have on a website’s overall performance. Use these insights to measure the success of your audience acquisition and retention strategies.
    
Input: DataFrame, ids: int, first_period_index: int, second_period_index: integer    

We will get all users from first period and then calculate the fraction of New Users to Returning Users for the second period. 
    
Output: any real positive number.  
    
Edge cases:  
<li> Data has less dimensions
<li> Length of Data is zero 
<li> Data is not numeric 
<li> Number of users is zero 

Let`s implement function:

In [20]:
def NVRV(X, ids, first_period_index, second_period_index):
    #Checking for edge cases:
    if len(X[ids]) == 0:
        return "DF has no entries!"
    else:
        if not (is_numeric_dtype(X[ids]) and (type(first_period_index) == int \
                or type(first_period_index) ==  float) and (type(second_period_index) == int \
                or type(second_period_index) ==  float)):
            return "Data should numeric!"
        
        elif  second_period_index < 1 or first_period_index < 1:
            return("indexes should be greater than 1!")
        elif (second_period_index < first_period_index) or (first_period_index > len(X)) \
                or (second_period_index > len(X)):
            return "Something is wrong with the data!"
        else:
            #Unique users from first period
            Unique_Id_First = set(X[ids][:first_period_index].unique())
            #Unique users from second period
            Unique_Id_Second = set(X[ids][first_period_index:second_period_index].unique())
            
            if len(Unique_Id_First) < 1:
                return "Something is wrong with the data!"
            
            #Calculating NVRV
            NVRV = len(Unique_Id_Second - Unique_Id_First) / len(Unique_Id_First.intersection(Unique_Id_Second))
        return round(NVRV, 2)

Lets check the function on expected data and edge cases:

In [21]:
NVRV(df, "ids", 50, 98)
print(NVRV(df, "ids", 50, 99))
print("===========================================")
print("Edge cases:")
print(NVRV(df, "ids", 50, 199))
print(NVRV(df.iloc[:0,:], "ids", 50, 99))
print(NVRV(df_zero, "ids", 150, 99))

0.42
Edge cases:
Something is wrong with the data!
DF has no entries!
Something is wrong with the data!
