# In this notebook we test the performance of the searching bar on github Marketplace.

In [3]:
import pandas as pd
from tqdm import tqdm 
from pathlib import Path
import csv
import pprint
import requests
import re 

# Path to data folder
DATA_DIR = Path('../../data')

# Load test data

In [5]:
df_action_name_test = (
    pd.read_csv(DATA_DIR / 'test.csv.gz', index_col = [0])
    .sort_values(by=['names_number'],ascending=False)
    .head(100)
)

#df_action_name = df_action_name.loc[(df_action_name['names_number'] >= 1000)]
df_action_name_test

Unnamed: 0,action,name_official,description_official,names_users,names_number
0,actions/upload-artifact,Upload a Build Artifact,Upload a build artifact that can be used by su...,"upload dh-make-golang test run as artifact,upl...",736
1,actions/cache,Cache,Cache artifacts like dependencies and build ou...,"cache conan data,handle yarn cache,restore nod...",363
2,actions/checkout,Checkout,Checkout a Git repository at a particular version,"checkout ref commit,checkout the source code,c...",359
3,actions/download-artifact,Download a Build Artifact,Download a build artifact that was previously ...,"download external libs,download ${{ matrix.nam...",228
4,actions/upload-release-asset,Upload a Release Asset,Upload a release asset to an existing release ...,"upload node modules package,uploading release ...",218
...,...,...,...,...,...
92,pypa/cibuildwheel,cibuildwheel,Installs and runs cibuildwheel on the current ...,"build wheels for linux,build macos wheels,buil...",4
90,shogo82148/actions-goveralls,actions-goveralls,Coveralls GitHub Action with Go integration po...,"report coverage,send goveralls coverage,upload...",4
89,r0adkll/sign-android-release,Sign Android release,An action to sign an Android release APK or AAB,"sign proprietary app bundle,sign helloxr openg...",4
88,azure/cli,Azure CLI Action,Automate your GitHub workflows using Azure CLI...,"get windows helper ips,create windows helper v...",4


In [6]:
list_actions = [row['action'] for _,row in df_action_name_test.iterrows()]
list_names_official = [row['name_official'].lower() for _,row in df_action_name_test.iterrows()]
list_names_users = [row['names_users'].split(',')[:10] for _,row in df_action_name_test.iterrows()]

# Evaluate the Marketplace search engine by searching for actions with user-assigned names in test set.

In [7]:
def search_actions(name):
    '''
    this function search actions through github marketplace with a given name.
    '''
    
    url_name = 'https://github.com/marketplace?query=' + "+".join(name.split(' ')) + '+'
    #print(url_name)
    response = requests.get(url_name)
    pattern = re.compile('<a href="/marketplace/actions/[^"]*')
    results = pattern.findall(response.text)
    results = [result.split('/')[-1] for result in results]
    
    #results = [result.split('/')[-1] for result in results]
    return results

In [8]:
# find the actions on github marketplace for all the user-assigned names.

all_action_list = []

for names_users in tqdm(list_names_users):
    
    found_actions_list = []
    #print(f'Official name:{name_official}')
    for name in names_users:
        
        #print(f'User name:{name}')
        actions = search_actions(name)
        found_actions_list.append(actions)

    
    all_action_list.append(found_actions_list)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [02:54<00:00,  1.75s/it]


In [9]:
def match_action(name_official, found_actions, top_n):
    '''
    This function checks if the desired action has actually been found by the marketplace searching engine.
    if yes, return 1, 0 otherwise.
    '''
    found = 0
    
    for found_action in found_actions[:top_n]:
        
        if name_official in found_action:
            
            found = 1
            break
        
    return found

def acc(all_action_match):
    '''
    This function returns the accuracy.
    '''
    acc = []
    for action_match in all_action_match:
        action_match = action_match
        acc.append(sum(action_match)/len(action_match))
    
    print(f'Average Acc:{round(sum(acc)/len(acc),4)}')
    return acc

In [10]:
# check the results for the actions found with the user-assigned names.

top_n = 2
all_action_match = []

for real_action,found_actions in zip(list_names_official,all_action_list):

    real_action = '-'.join(real_action.split(' '))
    
    action_match = []
    
    for found_action in found_actions:
    
        action_match.append(match_action(real_action,found_action,top_n))
    
    all_action_match.append(action_match)

In [11]:
list_acc = acc(all_action_match)

Average Acc:0.0279


# What about with the official names?

In [12]:
# find the actions on github marketplace with the official names.

actions_by_name_official = []

for name in tqdm(list_names_official):
    actions_by_name_official.append(search_actions(name))
    

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:31<00:00,  3.20it/s]


In [13]:
# check the results for the actions found with the official names.
top_n = 1
action_match = []

for real_action,found_action in zip(list_names_official,actions_by_name_official):
    real_action = '-'.join(real_action.split(' '))
    
    action_match.append(match_action(real_action,found_action,top_n))
        

In [16]:
print(sum(action_match)/len(action_match))

0.77
