# Prototype code for calling Github API to check repo traffic

In [6]:
import pandas
import numpy
import requests
import json
import datetime
import time
import os, sys
import yaml

def jprint(obj):
    ''' Create a formatted string of the Python JSON object '''
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)
    
def get_datetime_string():
    ''' Get strings representing today's date and time '''
    time_now = str(datetime.datetime.now())
    date_now = time_now.split(' ')[0] 
    return time_now, date_now

def logfile_exists(log_filename):
    ''' Check if a traffic log file exists for the particular repo '''
    list_of_logs = os.listdir('logs/')
    if len(set(list_of_logs).intersection(set([log_filename])))!=0:
        return True
    else:
        return False

def date_checked(log_dataframe, date_now):
    ''' Check if today's data has already been fetched '''
    
    if len(log_dataframe) == 0:
        return False
    
    most_recent_date = log_dataframe.sort_values('date_when_checked', ascending=False).iloc[0].date_when_checked
    
    if date_now == most_recent_date:
        return True
    else:
        return False

def check_github_traffic_api(endpoints, collect_data={}):
    ''' Check Github's API '''
    for endpoint in endpoints_to_check:        
        response = requests.get(
            f"https://api.github.com/repos/{owner_name}/{repo_name}/traffic/{endpoint}",
            headers= headers)
        print(f'Checked {endpoint}: status code {response.status_code}')
        collect_data[endpoint] = response.json()
        time.sleep(1)          
    return collect_data


In [7]:
# Parameters
path_to_token = "../_admin/private/token.txt"
owner_name = 'beingkk'
repo_name = 'github_monitor'
log_folder = 'logs/'
endpoints_to_check = ['popular/paths', 'popular/referrers', 'views', 'clones']

In [8]:
# Load parameters from a config file
with open('configs_test.yaml', 'r') as f:    
    config_params = yaml.load(f, Loader=yaml.FullLoader)

path_to_token = config_params['path_to_token']
owner_name = config_params['owner_name']
repo_name = config_params['repo_name']
log_folder = config_params['log_folder']
endpoints_to_check = config_params['endpoints_to_check']

In [9]:
# Request header, needed for authorisation
headers = {'Authorization': 'token %s' % open(path_to_token, "r").read()}

# Get today's date and time
time_now, date_now = get_datetime_string()

# Filename of the repo logs
log_filename = f'traffic_log_{repo_name}.csv'

In [11]:
# Check if a traffic log file of the repo exists 

if logfile_exists(log_filename):
    # Read in the log file
    log_dataframe = pandas.read_csv(f'{log_folder}{log_filename}')   
    print(f"Read {log_filename}.")      
    # Save a backup, just in case
    log_dataframe.to_csv(f'{log_folder}{log_filename[:-4]}_BACKUP.csv', index=False)    
else:
    # Create a new dataframe, with columns representing the fields to check
    log_dataframe = pandas.DataFrame(data={
        'date_when_checked':[],
        'time_when_checked':[],
        'popular/paths':[],
        'popular/referrers':[],
        'views':[],
        'clones':[]
    })
    print("Log file didn't exist; created a new table.")
        
# Check if today's data has been already registered        
if date_checked(log_dataframe, date_now):
    print("Abort! We already have traffic data from today")
else:          
    collect_data = {}
    collect_data['date_when_checked'] = date_now
    collect_data['time_when_checked'] = time_now
    collect_data = check_github_traffic_api(endpoints_to_check, collect_data)
    # Turn the responses to strings    
    collect_data_str = {}
    for key in list(collect_data.keys()):
        collect_data_str[key] = str(collect_data[key])
    # Add the responses to the log dataframe
    log_dataframe = log_dataframe.append(collect_data_str, ignore_index=True)   
    # Overwrite the log file
    log_dataframe.to_csv(f'{log_folder}{log_filename}', index=False)
          
    print(f"Collected and appended traffic data for {date_now} in {log_filename}")
          

Log file didn't exist; created a new table.
Checked popular/paths: status code 200
Checked popular/referrers: status code 200
Checked views: status code 200
Checked clones: status code 200
Collected and appended traffic data for 2020-12-11 in traffic_log_github_monitor.csv
