In [6]:
import pandas as pd
from sql import get_data
import seaborn as sns
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [7]:
# read test data from sql
df = get_data('SELECT * FROM keywords')
df.head()

Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid
0,1.27 mm pitch ribbon cable,10.0,,google,
1,10 conductor ribbon cable,0.0,,google,
2,10 pin flat cable,10.0,,google,
3,10 pin flat ribbon cable,10.0,,google,
4,12v dc dc converter,140.0,Hoch,google,0.73


In [8]:
df[df.suggested_bid.notnull()]

Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid
4,12v dc dc converter,140.0,Hoch,google,0.73
16,24v 5v dc dc converter,20.0,Hoch,google,1.05
26,4w620,10.0,Hoch,google,0.40
27,5 eck gartenhaus,2400.0,Hoch,google,0.93
31,5eck gartenhaus,110.0,Hoch,google,1.03
...,...,...,...,...,...
1867,vkb stellenangebote,10.0,Mittel,bing,0.14
1871,würth led,20.0,Mittel,bing,0.19
1872,würth leiterplatten,30.0,Gering,bing,1.18
1874,würth waldenburg,20.0,Gering,bing,0.22


In [9]:
# define percentage of clicks from ads_search_volume
estimated_percentage_clicks = 0.05
df['clicks_per_mo'] = df['avg_monthly_searches'] * estimated_percentage_clicks 
df['costs_per_mo'] = df['avg_monthly_searches'] * estimated_percentage_clicks * df['suggested_bid']
df.head()

Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid,clicks_per_mo,costs_per_mo
0,1.27 mm pitch ribbon cable,10.0,,google,,0.5,
1,10 conductor ribbon cable,0.0,,google,,0.0,
2,10 pin flat cable,10.0,,google,,0.5,
3,10 pin flat ribbon cable,10.0,,google,,0.5,
4,12v dc dc converter,140.0,Hoch,google,0.73,7.0,5.11


In [10]:
# filter dataframe for keyword
keyword = 'ki | ki'

df_filtered = df[(df['keyword'].str.contains(keyword, case=False)) & (df.suggested_bid.notnull())].sort_values(['clicks_per_mo'])

#df_filtered[avg_monthly_searches]

df_filtered.head()

Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid,clicks_per_mo,costs_per_mo
1370,entwicklung ki,10.0,Gering,bing,0.73,0.5,0.365
1382,ki modell,10.0,Gering,bing,0.63,0.5,0.315
1388,ki technologien,10.0,Mittel,bing,1.32,0.5,0.66
500,ki rpa,10.0,Hoch,google,7.26,0.5,3.63
1379,ki im unternehmen,10.0,Mittel,bing,0.87,0.5,0.435


In [11]:
# only show keywords that have less than 5% of the clicks of 
df_filtered[df_filtered['clicks_per_mo'] < df_filtered['clicks_per_mo'].max() * .05]

Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid,clicks_per_mo,costs_per_mo
1370,entwicklung ki,10.0,Gering,bing,0.73,0.5,0.365
1382,ki modell,10.0,Gering,bing,0.63,0.5,0.315
1388,ki technologien,10.0,Mittel,bing,1.32,0.5,0.66
500,ki rpa,10.0,Hoch,google,7.26,0.5,3.63
1379,ki im unternehmen,10.0,Mittel,bing,0.87,0.5,0.435


In [12]:
# only show keywords that have more than 5% of the clicks of 
df_filtered = df_filtered[df_filtered['clicks_per_mo'] >= df_filtered['clicks_per_mo'].max() * .05]
df_filtered

Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid,clicks_per_mo,costs_per_mo
1387,ki technologie,20.0,Mittel,bing,1.53,1.0,1.53
505,ki und big data,20.0,Gering,google,1.4,1.0,1.4
489,ki datenanalyse,20.0,Mittel,google,3.06,1.0,3.06
488,ki daten,20.0,Gering,google,2.46,1.0,2.46
497,ki nutzen,20.0,Gering,google,1.46,1.0,1.46
498,ki oder ai,20.0,Gering,google,1.05,1.0,1.05
490,ki einsatz,20.0,Mittel,google,1.45,1.0,1.45
483,ki analyse,20.0,Mittel,google,1.25,1.0,1.25
409,gibt es ki,20.0,Gering,google,1.04,1.0,1.04
501,ki seminar,20.0,Hoch,google,1.92,1.0,1.92


In [13]:
fig = px.scatter(df_filtered, x='suggested_bid', y='avg_monthly_searches',  color='platform', hover_data=['keyword', 'competition'])
fig.show()

In [14]:
fig = px.scatter(df_filtered, x='costs_per_mo', y='avg_monthly_searches',  color='platform', hover_data=['keyword', 'competition'])
fig.show()

In [15]:
df_filtered[['clicks_per_mo', 'costs_per_mo']].sum()

clicks_per_mo     98.000
costs_per_mo     205.265
dtype: float64

In [16]:
budget = 50

In [17]:
df_max_clicks = df_filtered[df_filtered['costs_per_mo']<=budget].sort_values(['clicks_per_mo', 'costs_per_mo'], ascending=[False, True]).reset_index(drop=True)
df_max_clicks.head(22)

Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid,clicks_per_mo,costs_per_mo
0,ki unternehmen,260.0,Mittel,google,2.62,13.0,34.06
1,ki künstliche intelligenz,210.0,Mittel,google,1.56,10.5,16.38
2,ki technologie,170.0,Mittel,google,1.64,8.5,13.94
3,ki modell,140.0,Gering,google,2.3,7.0,16.1
4,ki methoden,110.0,Gering,google,1.44,5.5,7.92
5,künstliche intelligenz ki,70.0,Gering,google,1.56,3.5,5.46
6,ki technologien,70.0,Mittel,google,1.69,3.5,5.915
7,ki intelligenz,70.0,Mittel,google,2.58,3.5,9.03
8,ki künstliche intelligenz,50.0,Gering,bing,0.58,2.5,1.45
9,ki und ai,50.0,Mittel,google,1.36,2.5,3.4


In [18]:
pos = []
costs = 0.0
for idx, row in df_max_clicks.iterrows():
    if idx == 0:
        pos.append(idx)
        costs += row['costs_per_mo']
    elif costs + row['costs_per_mo'] < budget:
        pos.append(idx)
        costs += row['costs_per_mo']

print(f"Costs per month: {costs} Euro")
print('Clicks per month: ' + str(df_max_clicks.iloc[pos]['clicks_per_mo'].sum()))
df_max_clicks.iloc[pos]



Costs per month: 49.45 Euro
Clicks per month: 24.0


Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid,clicks_per_mo,costs_per_mo
0,ki unternehmen,260.0,Mittel,google,2.62,13.0,34.06
2,ki technologie,170.0,Mittel,google,1.64,8.5,13.94
8,ki künstliche intelligenz,50.0,Gering,bing,0.58,2.5,1.45


In [19]:
def clicks_max(df, keyword, budget):
    """
    This function calculates the best combination of similar keywords by maximizing the clicks. 
    """
    # filter dataframe for similar keywords
    df = df[(df['keyword'].str.contains(keyword, case=False)) & (df.suggested_bid.notnull())]

    # filter dataframe for keywords with lower costs than budget
    df = df[df['costs_per_mo']<=budget]
    
    # sort value by klicks descending and costs ascending
    df = df.sort_values(['clicks_per_mo', 'costs_per_mo'], ascending=[False, True]).reset_index(drop=True)
    
    # define variables: empty list for the index of the optimal keywords; running sum of costs for optimal keywords
    key_position = []
    costs = 0.0

    # loop through the filtered dataframe
    for idx, row in df.iterrows():
        # take the first row 
        if idx == 0:
            key_position.append(idx)
            costs += row['costs_per_mo']
        # add costs of the additional keyword while running sum of budget is less then budget
        elif costs + row['costs_per_mo'] <= budget:
            key_position.append(idx)
            costs += row['costs_per_mo']

    # return total costs, klicks of best combination of similar keywords
    print(f"Costs per month: {costs} Euro")
    print('Clicks per month: ' + str(df.iloc[key_position]['clicks_per_mo'].sum()))
    return df.iloc[key_position].reset_index(drop=True)


In [20]:
clicks_max(df, 'ki | ki', 50)

Costs per month: 49.765 Euro
Clicks per month: 24.5


Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid,clicks_per_mo,costs_per_mo
0,ki unternehmen,260.0,Mittel,google,2.62,13.0,34.06
1,ki technologie,170.0,Mittel,google,1.64,8.5,13.94
2,ki künstliche intelligenz,50.0,Gering,bing,0.58,2.5,1.45
3,ki modell,10.0,Gering,bing,0.63,0.5,0.315


In [21]:
def costs_min(df, keyword, clicks_goal):
    """
    This function calculates the best combination of similar keywords by minimizing the costs. 
    """
    # filter dataframe for similar keywords
    df = df[(df['keyword'].str.contains(keyword, case=False)) & (df.suggested_bid.notnull())]

    # filter dataframe for keywords with lower click than budget
    df = df[df['costs_per_mo']<=clicks_goal]
    
    # sort value by klicks descending and costs ascending
    df = df.sort_values(['clicks_per_mo', 'costs_per_mo'], ascending=[False, True]).reset_index(drop=True)
    
    # define variables: empty list for the index of the optimal keywords; running sum of costs for optimal keywords
    key_position = []
    clicks = 0.0

    # loop through the filtered dataframe
    for idx, row in df.iterrows():
        # take the first row 
        if idx == 0:
            key_position.append(idx)
            clicks += row['clicks_per_mo']
        # add clicks of the additional keyword while running sum of clicks is less then the clicks_goal
        elif clicks + row['clicks_per_mo'] <= clicks_goal:
            key_position.append(idx)
            clicks += row['clicks_per_mo']

    # return total costs, Clicks of best combination of similar keywords
    print('Clicks per month: ', clicks)
    print('Costs per month: ' + str(df.iloc[key_position]['costs_per_mo'].sum().round(2)), 'EURO')
    return df.iloc[key_position].reset_index(drop=True)

In [22]:
costs_min(df, 'ki | ki', 50)

Clicks per month:  50.0
Costs per month: 96.42 EURO


Unnamed: 0,keyword,avg_monthly_searches,competition,platform,suggested_bid,clicks_per_mo,costs_per_mo
0,ki unternehmen,260.0,Mittel,google,2.62,13.0,34.06
1,ki künstliche intelligenz,210.0,Mittel,google,1.56,10.5,16.38
2,ki technologie,170.0,Mittel,google,1.64,8.5,13.94
3,ki modell,140.0,Gering,google,2.3,7.0,16.1
4,ki methoden,110.0,Gering,google,1.44,5.5,7.92
5,künstliche intelligenz ki,70.0,Gering,google,1.56,3.5,5.46
6,ki bereiche,40.0,Gering,google,1.28,2.0,2.56
