In [1]:
"""OVERVIEW
Create a set of functions that will calculate metrics for a news site 
(CTR, CR, average page time, share of readings)

CTR (Click-through rate) - ad conversion,
i.e. the ratio of the number of clicks to the number of ad displays.

CR (Conversion rate) - conversion to a particular section of the site,
I.e. the ration of the number of users on previous page and number of users on current page.

Average page time - the ratio all time to the number of users. 
In this case you shouldn’t count time less than 5 seconds on page.

Share of readings - how many times users share this page,
i.e. the ratio of the number of share and the number of users.

News site has main page and categories: politics, sport, science, technologies.
Every category has about 10 new posts every day. You can use this information for solving task.

Complete the task in Jupyter Notebook. 
Insert the programming code in code cells and text explanations in markdown cells
then apply formatting and headings. When you finish, send your work to staff."""

'OVERVIEW\nCreate a set of functions that will calculate metrics for a news site \n(CTR, CR, average page time, share of readings)\n\nCTR (Click-through rate) - ad conversion,\ni.e. the ratio of the number of clicks to the number of ad displays.\n\nCR (Conversion rate) - conversion to a particular section of the site,\nI.e. the ration of the number of users on previous page and number of users on current page.\n\nAverage page time - the ratio all time to the number of users. \nIn this case you shouldn’t count time less than 5 seconds on page.\n\nShare of readings - how many times users share this page,\ni.e. the ratio of the number of share and the number of users.\n\nNews site has main page and categories: politics, sport, science, technologies.\nEvery category has about 10 new posts every day. You can use this information for solving task.\n\nComplete the task in Jupyter Notebook. \nInsert the programming code in code cells and text explanations in markdown cells\nthen apply formatti

# CALCULATE CTR 

In [2]:
def cal_CTR(df):
    """
    CTR (Click-through rate) - ad conversion,
    The ratio of the number of clicks to the number of ad displays.
    
    input: dataframe 
    output: float 
    
    col1: DATE. Can be any type of time. 
    col2: ad_num. Multiple ads display in the platform at the same time.
    col3: click_times 
    col4: category for the information above
    
    I assume, there are multiple ads in the platform at the same time.
    users click the ads that may interest them.
    """
    total_ad_num = sum(df['ad_num'])
    total_click = sum(df['click_times'])
    CRT = round(total_click/total_ad_num,2)
    print('The Click-through rate is {}'.format(CRT))
    return CRT

In [3]:
import pandas as pd
data1 = {'DATE':['2020-03-01', '2020-03-01', '2020-03-01', '2020-03-01'], 
         'Category':['politics', 'sport', 'science', 'technologies'],
        'ad_num':[3, 2, 5, 1],
        'click_times': [2, 0, 0, 1]
       }  
# Create DataFrame 
table1 = pd.DataFrame(data1) 
# Print the output. 
table1

Unnamed: 0,DATE,Category,ad_num,click_times
0,2020-03-01,politics,3,2
1,2020-03-01,sport,2,0
2,2020-03-01,science,5,0
3,2020-03-01,technologies,1,1


In [17]:
ctr = cal_CTR(table1)

The Click-through rate is 0.27


# CALCULATE CR (Conversion rate)

In [5]:
def cal_CR(df):
    """
    CR (Conversion rate) - conversion to a particular section of the site,
    I.e. the ratio of the number of users on previous page and number of users on current page.
    
    input: dataframe 
    output: float 
    
    col1: UserName. Can be user id or similar items. 
    col2: prev_category. prev page category.
    col3: cur_category. current page category.
    col4: Prev_Page. The previous page index.
    col5: Current_Page.The current page index.
    """
    t = df[(df['Prev_Page']+1 == df['Current_Page']) & (df['prev_category'] == df['cur_category'])]
    total_page = df.shape[0]
    p_to_c_page = t.shape[0]
    CR = round(p_to_c_page/total_page,2)
    print('The CR (Conversion rate) is {}'.format(CR) )
    return CR

In [6]:
import pandas as pd
data2 = {'UserName':['Tom', 'nick', 'krish', 'jack'], 
         'prev_category':['politics', 'science', 'technologies', 'technologies'],
         'cur_category':['politics', 'sport', 'science', 'technologies'],
        'Prev_Page':[1, 2, 3, 1],
        'Current_Page': [2, 9, 3, 2]
       }  
# Create DataFrame 
table2 = pd.DataFrame(data2) 
# Print the output. 
table2

Unnamed: 0,UserName,prev_category,cur_category,Prev_Page,Current_Page
0,Tom,politics,politics,1,2
1,nick,science,sport,2,9
2,krish,technologies,science,3,3
3,jack,technologies,technologies,1,2


In [16]:
cr = cal_CR(table2)

The CR (Conversion rate) is 0.5


# CALCULATE Average page time

In [8]:
def cal_AVG_TIME(df):
    """
    Average page time - the ratio all time to the number of users. 
    In this case you shouldn’t count time less than 5 seconds on page.
    
    
    input: dataframe 
    output: int
    
    col1: UserName. Can be user id or similar items. 
    col2: Page category
    col3: Time. Number of time spend on the current page (Second).
    col4: Current_Page.The current page index.
    """
    t = df[df['Time'] > 5]
    total_time = sum(t['Time'])
    total_user = len(df.UserName.unique())
    AVG_TIME = int(total_time /total_user) 
    print('Average page time is {}'.format(AVG_TIME) )
    return AVG_TIME 

In [9]:
import pandas as pd
data3 = {'UserName':['Tom', 'nick', 'jack', 'jack'], 
         'Category':['politics', 'sport', 'science', 'technologies'],
        'Time':[90, 180, 10, 3],
        'Current_Page': [2, 9, 3, 2]
       }  
# Create DataFrame 
table3 = pd.DataFrame(data3) 
# Print the output. 
table3

Unnamed: 0,UserName,Category,Time,Current_Page
0,Tom,politics,90,2
1,nick,sport,180,9
2,jack,science,10,3
3,jack,technologies,3,2


In [15]:
apt = cal_AVG_TIME(table3)

Average page time is 93


# CALCULATE Share of readings

In [11]:
def cal_sharing_times(df):
    """
    Share of readings - how many times users share this page,
    i.e. the ratio of the number of share and the number of users.
    
    
    input: dataframe 
    output: float
    
    col1: UserName. Can be user id or similar items. 
    col2: sharing page category 
    col3: Sharing_Times. how many times users share this page.
    """
    total_sharing = sum(df['Sharing_Time'])
    total_user = len(df.UserName.unique())
    AVG_sharing_times = round(total_sharing/total_user,2) 
    print('Share of readings is {}'.format(AVG_sharing_times) )
    return AVG_sharing_times

In [12]:
data4 = {'UserName':['Tom', 'nick', 'jack', 'jack'], 
        'Category':['politics', 'sport', 'science', 'technologies'],
        'Sharing_Time': [2, 9, 3, 2]
       }  
# Create DataFrame 
table4 = pd.DataFrame(data4) 
# Print the output. 
table4

Unnamed: 0,UserName,Category,Sharing_Time
0,Tom,politics,2
1,nick,sport,9
2,jack,science,3
3,jack,technologies,2


In [14]:
st = cal_sharing_times(table4)

Share of readings is 5.33
